Skip to content

Commit 6d4e65f

Browse files
authored
Merge pull request #300 from cmu-delphi/development
Development
2 parents 619e3c5 + d493bd9 commit 6d4e65f

File tree

5 files changed

+115
-98
lines changed

5 files changed

+115
-98
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
Logging formatters for Elasticsearch-compatible structured output.
3+
"""
4+
5+
import json
6+
import logging
7+
from datetime import datetime, timezone
8+
9+
10+
# Standard LogRecord attributes - excluded from extra when building JSON
11+
_RECORD_ATTRS = frozenset(
12+
{
13+
"name",
14+
"msg",
15+
"args",
16+
"created",
17+
"filename",
18+
"funcName",
19+
"levelname",
20+
"levelno",
21+
"lineno",
22+
"module",
23+
"msecs",
24+
"pathname",
25+
"process",
26+
"processName",
27+
"relativeCreated",
28+
"stack_info",
29+
"exc_info",
30+
"exc_text",
31+
"thread",
32+
"threadName",
33+
"message",
34+
"asctime",
35+
"taskName",
36+
}
37+
)
38+
39+
40+
class JsonFormatter(logging.Formatter):
41+
"""
42+
Format log records as single-line JSON for Elasticsearch.
43+
44+
Each log line is a valid JSON object with @timestamp, level, logger, message,
45+
and any extra fields passed via logger.info(..., extra={...}).
46+
"""
47+
48+
def format(self, record: logging.LogRecord) -> str:
49+
log_obj = {
50+
"@timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z",
51+
"level": record.levelname,
52+
"logger": record.name,
53+
"message": record.getMessage(),
54+
}
55+
56+
# Add extra fields (passed via extra= in log calls)
57+
for key, value in record.__dict__.items():
58+
if key not in _RECORD_ATTRS and value is not None:
59+
log_obj[key] = value
60+
61+
# Add exception info if present
62+
if record.exc_info:
63+
log_obj["exception"] = self.formatException(record.exc_info)
64+
65+
return json.dumps(log_obj, default=str)

src/epiportal/middleware.py

Lines changed: 9 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22
Request logging middleware that captures comprehensive data from all HTTP requests.
33
"""
44

5-
import json
6-
import logging
75
import time
86
import uuid
97
from typing import Any
108

9+
from delphi_utils import get_structured_logger
1110
from django.utils.deprecation import MiddlewareMixin
1211

13-
logger = logging.getLogger("epiportal.requests")
12+
from epiportal.utils import get_client_ip
13+
14+
logger = get_structured_logger("epiportal.requests")
1415

1516
# Headers that may contain sensitive data - values will be redacted
1617
SENSITIVE_HEADERS = frozenset(
@@ -26,14 +27,11 @@
2627
)
2728
)
2829

29-
# Maximum bytes of request/response body to log (prevents log bloat)
30-
MAX_BODY_LOG_SIZE = 4096
31-
3230
# Path segments to exclude from request logging (matched anywhere in path)
3331
LOG_EXCLUDE_PATH_PATTERNS = (
34-
"get_table_stats_info",
35-
"get_related_indicators",
36-
"get_available_geos",
32+
# "get_table_stats_info",
33+
# "get_related_indicators",
34+
# "get_available_geos",
3735
)
3836

3937

@@ -43,15 +41,6 @@ def _should_log_request(request) -> bool:
4341
return not any(pattern in path for pattern in LOG_EXCLUDE_PATH_PATTERNS)
4442

4543

46-
def _get_client_ip(request) -> str:
47-
"""Extract client IP, respecting X-Forwarded-For when behind proxies."""
48-
x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR")
49-
if x_forwarded_for:
50-
# Take the leftmost (original client) IP
51-
return x_forwarded_for.split(",")[0].strip()
52-
return request.META.get("REMOTE_ADDR", "")
53-
54-
5544
def _sanitize_headers(meta: dict) -> dict[str, str]:
5645
"""Extract HTTP headers from META, redacting sensitive ones."""
5746
headers = {}
@@ -65,19 +54,6 @@ def _sanitize_headers(meta: dict) -> dict[str, str]:
6554
return headers
6655

6756

68-
def _safe_body_preview(body: bytes | None, max_size: int = MAX_BODY_LOG_SIZE) -> str | None:
69-
"""Return a safe preview of request/response body, truncated if large."""
70-
if body is None or len(body) == 0:
71-
return None
72-
try:
73-
decoded = body.decode("utf-8", errors="replace")
74-
if len(decoded) > max_size:
75-
return decoded[:max_size] + f"... [truncated, total {len(body)} bytes]"
76-
return decoded
77-
except Exception:
78-
return f"[binary, {len(body)} bytes]"
79-
80-
8157
class RequestLoggingMiddleware(MiddlewareMixin):
8258
"""
8359
Middleware that logs every HTTP request with comprehensive request and response data.
@@ -111,7 +87,7 @@ def process_response(self, request, response):
11187
"full_uri": request.build_absolute_uri(),
11288
"query_string": request.META.get("QUERY_STRING") or "",
11389
"query_params": dict(request.GET) if request.GET else {},
114-
"client_ip": _get_client_ip(request),
90+
"client_ip": get_client_ip(request),
11591
"referer": request.META.get("HTTP_REFERER", ""),
11692
"user_agent": request.META.get("HTTP_USER_AGENT", ""),
11793
"content_type": request.content_type or "",
@@ -128,31 +104,10 @@ def process_response(self, request, response):
128104
else:
129105
log_data["user"] = "anonymous"
130106

131-
# Request body (for methods that typically have one)
132-
if request.method in ("POST", "PUT", "PATCH"):
133-
try:
134-
body = getattr(request, "_body", None) or request.body
135-
log_data["request_body_preview"] = _safe_body_preview(body)
136-
except Exception as e:
137-
log_data["request_body_error"] = str(e)
138-
139-
# Response body (for API responses, avoid huge HTML)
140-
if (
141-
"application/json" in (response.get("Content-Type") or "")
142-
and hasattr(response, "content")
143-
):
144-
log_data["response_body_preview"] = _safe_body_preview(response.content)
145-
146107
if duration_ms is not None:
147108
log_data["duration_ms"] = round(duration_ms, 2)
148109

149-
logger.info(
150-
"%s %s %s | %s",
151-
request.method,
152-
request.path,
153-
response.status_code,
154-
json.dumps(log_data, default=str),
155-
)
110+
logger.info("request", **log_data)
156111

157112
except Exception as e:
158113
logger.exception("Error in request logging middleware: %s", e)

src/epiportal/settings.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -244,29 +244,20 @@
244244
'formatter': 'simple',
245245
'stream': sys.stdout,
246246
},
247-
'request_console': {
248-
'class': 'logging.StreamHandler',
249-
'formatter': 'request_verbose',
250-
'stream': sys.stdout,
251-
},
252247
},
253248
'loggers': {
254249
'django': {
255250
'handlers': ['console'],
256251
'level': 'INFO',
257252
'propagate': True,
258253
},
259-
'epiportal.requests': {
260-
'handlers': ['request_console'],
261-
'level': 'INFO',
262-
'propagate': False,
263-
},
264254
},
265255
}
266256

267257
if DEBUG:
268258
for logger in LOGGING['loggers']:
269-
LOGGING['loggers'][logger]['handlers'] = ['console']
259+
if logger != 'epiportal.requests':
260+
LOGGING['loggers'][logger]['handlers'] = ['console']
270261

271262

272263
# DRF Spectacular settings

src/epiportal/utils.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Shared utilities for epiportal.
3+
"""
4+
5+
from django.conf import settings
6+
7+
8+
def get_client_ip(request) -> str:
9+
"""
10+
Extract the real client IP from a request, respecting X-Forwarded-For when behind proxies.
11+
12+
Use this everywhere you need the client IP (logging, rate limiting, etc.) to ensure
13+
consistent behavior. Requires PROXY_DEPTH to be set correctly for your production
14+
proxy chain (e.g. 2 for AWS ALB + nginx).
15+
"""
16+
if not settings.REVERSE_PROXY_DEPTH:
17+
return request.META.get("REMOTE_ADDR", "")
18+
19+
x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR")
20+
if x_forwarded_for:
21+
full_proxy_chain = [s.strip() for s in x_forwarded_for.split(",")]
22+
depth = (
23+
settings.REVERSE_PROXY_DEPTH
24+
if settings.REVERSE_PROXY_DEPTH > 0
25+
else len(full_proxy_chain)
26+
)
27+
trusted = full_proxy_chain[-depth:] if depth else full_proxy_chain
28+
if trusted:
29+
return trusted[0]
30+
31+
x_real_ip = request.META.get("HTTP_X_REAL_IP")
32+
if x_real_ip:
33+
return x_real_ip.strip()
34+
35+
return request.META.get("REMOTE_ADDR", "")

src/indicatorsets/utils.py

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66

77
import requests
88
from django.conf import settings
9-
from django.http import JsonResponse
109
from django.core.cache import cache
10+
from django.http import JsonResponse
11+
from epiportal.utils import get_client_ip
1112
from epiweeks import Week
1213
from delphi_utils import get_structured_logger
1314

@@ -681,36 +682,6 @@ def generate_query_code_flusurv(flusurv_geos, start_date, end_date):
681682
return python_code_blocks, r_code_blocks
682683

683684

684-
def get_real_ip_addr(req): # `req` should be a Flask.request object
685-
if settings.REVERSE_PROXY_DEPTH:
686-
# we only expect/trust (up to) "REVERSE_PROXY_DEPTH" number of proxies between this server and the outside world.
687-
# a REVERSE_PROXY_DEPTH of 0 means not proxied, i.e. server is globally directly reachable.
688-
# a negative proxy depth is a special case to trust the whole chain -- not generally recommended unless the
689-
# most-external proxy is configured to disregard "X-Forwarded-For" from outside.
690-
# really, ONLY trust the following headers if reverse proxied!!!
691-
x_forwarded_for = req.META.get("HTTP_X_FORWARDED_FOR")
692-
693-
if x_forwarded_for:
694-
full_proxy_chain = x_forwarded_for.split(",")
695-
# eliminate any extra addresses at the front of this list, as they could be spoofed.
696-
if settings.REVERSE_PROXY_DEPTH > 0:
697-
depth = settings.REVERSE_PROXY_DEPTH
698-
else:
699-
# special case for -1/negative: setting `depth` to 0 will not strip any items from the chain
700-
depth = 0
701-
trusted_proxy_chain = full_proxy_chain[-depth:]
702-
# accept the first (or only) address in the remaining trusted part of the chain as the actual remote address
703-
return trusted_proxy_chain[0].strip()
704-
705-
# fall back to "X-Real-Ip" if "X-Forwarded-For" isnt present
706-
x_real_ip = req.META.get("HTTP_X_REAL_IP")
707-
if x_real_ip:
708-
return x_real_ip
709-
710-
# if we are not proxied (or we are proxied but the headers werent present and we fell through to here), just use the remote ip addr as the true client address
711-
return req.META.get("REMOTE_ADDR")
712-
713-
714685
def log_form_stats(request, data, form_mode):
715686
log_data = {
716687
"form_mode": form_mode,
@@ -729,7 +700,7 @@ def log_form_stats(request, data, form_mode):
729700
),
730701
"api_key_used": bool(data.get("api_key")),
731702
"api_key": data.get("api_key", "Not provided"),
732-
"user_ip": get_real_ip_addr(request),
703+
"user_ip": get_client_ip(request),
733704
"user_ga_id": data.get("clientId", "Not available"),
734705
}
735706

@@ -805,7 +776,7 @@ def log_form_data(request, data, form_mode):
805776
"epiweeks": get_epiweek(data.get("start_date", ""), data.get("end_date", "")) if data.get("start_date") and data.get("end_date") else [], # fmt: skip
806777
"api_key_used": bool(data.get("apiKey")),
807778
"api_key": data.get("apiKey", "Not provided"),
808-
"user_ip": get_real_ip_addr(request),
779+
"user_ip": get_client_ip(request),
809780
"user_ga_id": data.get("clientId", "Not available"),
810781
}
811782
form_data_logger.info("form_data", **log_data)

0 commit comments

Comments
 (0)