Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 6 additions & 36 deletions src/epiportal/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from typing import Any

from django.utils.deprecation import MiddlewareMixin
from django.conf import settings

from epiportal.utils import get_client_ip

logger = logging.getLogger("epiportal.requests")

Expand All @@ -32,9 +33,9 @@

# Path segments to exclude from request logging (matched anywhere in path)
LOG_EXCLUDE_PATH_PATTERNS = (
"get_table_stats_info",
"get_related_indicators",
"get_available_geos",
# "get_table_stats_info",
# "get_related_indicators",
# "get_available_geos",
)


Expand All @@ -44,37 +45,6 @@ def _should_log_request(request) -> bool:
return not any(pattern in path for pattern in LOG_EXCLUDE_PATH_PATTERNS)


def _get_client_ip(req) -> str:
"""Extract client IP, respecting X-Forwarded-For when behind proxies."""
if settings.REVERSE_PROXY_DEPTH:
# we only expect/trust (up to) "REVERSE_PROXY_DEPTH" number of proxies between this server and the outside world.
# a REVERSE_PROXY_DEPTH of 0 means not proxied, i.e. server is globally directly reachable.
# a negative proxy depth is a special case to trust the whole chain -- not generally recommended unless the
# most-external proxy is configured to disregard "X-Forwarded-For" from outside.
# really, ONLY trust the following headers if reverse proxied!!!
x_forwarded_for = req.META.get("HTTP_X_FORWARDED_FOR")

if x_forwarded_for:
full_proxy_chain = x_forwarded_for.split(",")
# eliminate any extra addresses at the front of this list, as they could be spoofed.
if settings.REVERSE_PROXY_DEPTH > 0:
depth = settings.REVERSE_PROXY_DEPTH
else:
# special case for -1/negative: setting `depth` to 0 will not strip any items from the chain
depth = 0
trusted_proxy_chain = full_proxy_chain[-depth:]
# accept the first (or only) address in the remaining trusted part of the chain as the actual remote address
return trusted_proxy_chain[0].strip()

# fall back to "X-Real-Ip" if "X-Forwarded-For" isnt present
x_real_ip = req.META.get("HTTP_X_REAL_IP")
if x_real_ip:
return x_real_ip

# if we are not proxied (or we are proxied but the headers werent present and we fell through to here), just use the remote ip addr as the true client address
return req.META.get("REMOTE_ADDR")


def _sanitize_headers(meta: dict) -> dict[str, str]:
"""Extract HTTP headers from META, redacting sensitive ones."""
headers = {}
Expand Down Expand Up @@ -134,7 +104,7 @@ def process_response(self, request, response):
"full_uri": request.build_absolute_uri(),
"query_string": request.META.get("QUERY_STRING") or "",
"query_params": dict(request.GET) if request.GET else {},
"client_ip": _get_client_ip(request),
"client_ip": get_client_ip(request),
"referer": request.META.get("HTTP_REFERER", ""),
"user_agent": request.META.get("HTTP_USER_AGENT", ""),
"content_type": request.content_type or "",
Expand Down
35 changes: 35 additions & 0 deletions src/epiportal/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
Shared utilities for epiportal.
"""

from django.conf import settings


def get_client_ip(request) -> str:
"""
Extract the real client IP from a request, respecting X-Forwarded-For when behind proxies.

Use this everywhere you need the client IP (logging, rate limiting, etc.) to ensure
consistent behavior. Requires PROXY_DEPTH to be set correctly for your production
proxy chain (e.g. 2 for AWS ALB + nginx).
"""
if not settings.REVERSE_PROXY_DEPTH:
return request.META.get("REMOTE_ADDR", "")

x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR")
if x_forwarded_for:
full_proxy_chain = [s.strip() for s in x_forwarded_for.split(",")]
depth = (
settings.REVERSE_PROXY_DEPTH
if settings.REVERSE_PROXY_DEPTH > 0
else len(full_proxy_chain)
)
trusted = full_proxy_chain[-depth:] if depth else full_proxy_chain
if trusted:
return trusted[0]

x_real_ip = request.META.get("HTTP_X_REAL_IP")
if x_real_ip:
return x_real_ip.strip()

return request.META.get("REMOTE_ADDR", "")
37 changes: 4 additions & 33 deletions src/indicatorsets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@

import requests
from django.conf import settings
from django.http import JsonResponse
from django.core.cache import cache
from django.http import JsonResponse
from epiportal.utils import get_client_ip
from epiweeks import Week
from delphi_utils import get_structured_logger

Expand Down Expand Up @@ -681,36 +682,6 @@ def generate_query_code_flusurv(flusurv_geos, start_date, end_date):
return python_code_blocks, r_code_blocks


def get_real_ip_addr(req): # `req` should be a Flask.request object
if settings.REVERSE_PROXY_DEPTH:
# we only expect/trust (up to) "REVERSE_PROXY_DEPTH" number of proxies between this server and the outside world.
# a REVERSE_PROXY_DEPTH of 0 means not proxied, i.e. server is globally directly reachable.
# a negative proxy depth is a special case to trust the whole chain -- not generally recommended unless the
# most-external proxy is configured to disregard "X-Forwarded-For" from outside.
# really, ONLY trust the following headers if reverse proxied!!!
x_forwarded_for = req.META.get("HTTP_X_FORWARDED_FOR")

if x_forwarded_for:
full_proxy_chain = x_forwarded_for.split(",")
# eliminate any extra addresses at the front of this list, as they could be spoofed.
if settings.REVERSE_PROXY_DEPTH > 0:
depth = settings.REVERSE_PROXY_DEPTH
else:
# special case for -1/negative: setting `depth` to 0 will not strip any items from the chain
depth = 0
trusted_proxy_chain = full_proxy_chain[-depth:]
# accept the first (or only) address in the remaining trusted part of the chain as the actual remote address
return trusted_proxy_chain[0].strip()

# fall back to "X-Real-Ip" if "X-Forwarded-For" isnt present
x_real_ip = req.META.get("HTTP_X_REAL_IP")
if x_real_ip:
return x_real_ip

# if we are not proxied (or we are proxied but the headers werent present and we fell through to here), just use the remote ip addr as the true client address
return req.META.get("REMOTE_ADDR")


def log_form_stats(request, data, form_mode):
log_data = {
"form_mode": form_mode,
Expand All @@ -729,7 +700,7 @@ def log_form_stats(request, data, form_mode):
),
"api_key_used": bool(data.get("api_key")),
"api_key": data.get("api_key", "Not provided"),
"user_ip": get_real_ip_addr(request),
"user_ip": get_client_ip(request),
"user_ga_id": data.get("clientId", "Not available"),
}

Expand Down Expand Up @@ -805,7 +776,7 @@ def log_form_data(request, data, form_mode):
"epiweeks": get_epiweek(data.get("start_date", ""), data.get("end_date", "")) if data.get("start_date") and data.get("end_date") else [], # fmt: skip
"api_key_used": bool(data.get("apiKey")),
"api_key": data.get("apiKey", "Not provided"),
"user_ip": get_real_ip_addr(request),
"user_ip": get_client_ip(request),
"user_ga_id": data.get("clientId", "Not available"),
}
form_data_logger.info("form_data", **log_data)
Expand Down
Loading