Skip to content

Commit 2d3fa14

Browse files
authored
Handle Redis Connection Failure (#856)
* Better handle redis failing and working without redis Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> * Remove REDIS_REQUIRED and instead default to crashing on error connecting to redis Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> * Migrate from os._exit(1) to sys.exit(1) Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> * formatting Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> * Extra testing Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> * Implement proper degradation for redis crashing Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> * Refactor Redis error handling in socketio.py Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> - Removed custom exception classes for Redis connection, configuration, and operation errors to simplify error handling. - Updated error handling in `create_socket_manager()` to log failures without specific Redis error classification. - Cleaned up imports in test files to reflect the removal of the custom exceptions. - Enhanced comments for clarity on exception handling strategy. * Rename Redis connection validation functions Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> - Renamed functions for clarity: `_validate_redis_before_manager_creation` to `can_we_reach_redis` and `validate_redis_connection` to `should_we_use_redis`. * removed single use function Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> * Corrected tests to use safe_emit Signed-off-by: Gavin Jaeger-Freeborn <[email protected]> --------- Signed-off-by: Gavin Jaeger-Freeborn <[email protected]>
1 parent 4263993 commit 2d3fa14

File tree

10 files changed

+712
-99
lines changed

10 files changed

+712
-99
lines changed

docker/docker-compose.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ services:
7878
depends_on:
7979
redis:
8080
condition: service_healthy
81+
healthcheck:
82+
test: ["CMD", "curl", "-f", "localhost:5000/health"]
83+
interval: 10s
84+
timeout: 3s
85+
retries: 3
8186

8287
redis:
8388
image: redis:8-alpine

oidc-controller/api/core/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,14 @@ class GlobalConfig(BaseSettings):
259259
REDIS_DB: int = int(os.environ.get("REDIS_DB", 0))
260260
USE_REDIS_ADAPTER: bool = strtobool(os.environ.get("USE_REDIS_ADAPTER", False))
261261

262+
# Redis error handling and retry configuration
263+
REDIS_THREAD_MAX_RETRIES: int = int(os.environ.get("REDIS_THREAD_MAX_RETRIES", 5))
264+
REDIS_PUBSUB_MAX_FAILURES: int = int(
265+
os.environ.get("REDIS_PUBSUB_MAX_FAILURES", 10)
266+
)
267+
REDIS_RETRY_BASE_DELAY: int = int(os.environ.get("REDIS_RETRY_BASE_DELAY", 1))
268+
REDIS_RETRY_MAX_DELAY: int = int(os.environ.get("REDIS_RETRY_MAX_DELAY", 60))
269+
262270
model_config = ConfigDict(case_sensitive=True)
263271

264272

oidc-controller/api/main.py

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pathlib import Path
88

99
import uvicorn
10+
import redis.asyncio as async_redis
1011
from api.core.config import settings
1112
from fastapi import FastAPI
1213
from starlette.requests import Request
@@ -26,7 +27,7 @@
2627
)
2728
from .verificationConfigs.router import router as ver_configs_router
2829
from .clientConfigurations.router import router as client_config_router
29-
from .routers.socketio import sio_app
30+
from .routers.socketio import sio_app, _build_redis_url, _handle_redis_failure
3031
from api.core.oidc.provider import init_provider
3132

3233
logger: structlog.typing.FilteringBoundLogger = structlog.getLogger(__name__)
@@ -105,34 +106,32 @@ async def logging_middleware(request: Request, call_next) -> Response:
105106
try:
106107
response: Response = await call_next(request)
107108
return response
108-
finally:
109+
except Exception as e:
109110
process_time = time.time() - start_time
110-
# If we have a response object, log the details
111-
if "response" in locals():
112-
logger.info(
113-
"processed a request",
114-
status_code=response.status_code,
115-
process_time=process_time,
116-
)
117-
# Otherwise, extract the exception from traceback, log and return a 500 response
118-
else:
119-
logger.info(
120-
"failed to process a request",
121-
status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
122-
process_time=process_time,
123-
)
124-
125-
# Need to explicitly log the traceback
126-
logger.error(traceback.format_exc())
127-
128-
return JSONResponse(
129-
status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
130-
content={
131-
"status": "error",
132-
"message": "Internal Server Error",
133-
"process_time": process_time,
134-
},
135-
)
111+
logger.info(
112+
"failed to process a request",
113+
status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
114+
process_time=process_time,
115+
)
116+
117+
# Need to explicitly log the traceback
118+
logger.error(traceback.format_exc())
119+
120+
return JSONResponse(
121+
status_code=http_status.HTTP_500_INTERNAL_SERVER_ERROR,
122+
content={
123+
"status": "error",
124+
"message": "Internal Server Error",
125+
"process_time": process_time,
126+
},
127+
)
128+
else:
129+
process_time = time.time() - start_time
130+
logger.info(
131+
"processed a request",
132+
status_code=response.status_code,
133+
process_time=process_time,
134+
)
136135

137136

138137
@app.on_event("startup")
@@ -141,6 +140,23 @@ async def on_tenant_startup():
141140
await init_db()
142141
await init_provider(await get_db())
143142

143+
# Check Redis availability if adapter is enabled
144+
if settings.USE_REDIS_ADAPTER:
145+
try:
146+
# Test Redis connectivity during startup
147+
redis_url = _build_redis_url()
148+
redis_client = async_redis.from_url(redis_url)
149+
await redis_client.ping()
150+
await redis_client.close()
151+
logger.info("Redis adapter is available and ready")
152+
except Exception as e:
153+
error_type = _handle_redis_failure("startup Redis check", e)
154+
logger.warning(
155+
f"Redis adapter enabled but unavailable (type: {error_type}) - continuing with degraded Socket.IO functionality"
156+
)
157+
else:
158+
logger.debug("Redis adapter disabled")
159+
144160
logger.info(">>> Starting up app new ...")
145161

146162

oidc-controller/api/routers/acapy_handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from ..verificationConfigs.crud import VerificationConfigCRUD
1414

1515
from ..core.config import settings
16-
from ..routers.socketio import sio, get_socket_id_for_pid
16+
from ..routers.socketio import sio, get_socket_id_for_pid, safe_emit
1717

1818
logger: structlog.typing.FilteringBoundLogger = structlog.getLogger(__name__)
1919

@@ -106,7 +106,7 @@ async def _emit_status_to_socket(
106106
pid = str(auth_session.id)
107107
sid = await get_socket_id_for_pid(pid, db)
108108
if sid:
109-
await sio.emit("status", {"status": status}, to=sid)
109+
await safe_emit("status", {"status": status}, to=sid)
110110

111111

112112
async def _parse_webhook_body(request: Request) -> dict[Any, Any]:

oidc-controller/api/routers/oidc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from ..db.session import get_db
3232

3333
# Access to the websocket
34-
from ..routers.socketio import get_socket_id_for_pid, sio
34+
from ..routers.socketio import get_socket_id_for_pid, sio, safe_emit
3535

3636
from ..verificationConfigs.crud import VerificationConfigCRUD
3737
from ..verificationConfigs.helpers import VariableSubstitutionError
@@ -80,7 +80,7 @@ async def poll_pres_exch_complete(pid: str, db: Database = Depends(get_db)):
8080
)
8181
# Send message through the websocket.
8282
if sid:
83-
await sio.emit("status", {"status": "expired"}, to=sid)
83+
await safe_emit("status", {"status": "expired"}, to=sid)
8484

8585
return {"proof_status": auth_session.proof_status}
8686

oidc-controller/api/routers/presentation_request.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from ..authSessions.models import AuthSession, AuthSessionState
1010

1111
from ..core.config import settings
12-
from ..routers.socketio import sio, get_socket_id_for_pid
12+
from ..routers.socketio import sio, get_socket_id_for_pid, safe_emit
1313
from ..routers.oidc import gen_deep_link
1414
from ..db.session import get_db
1515

@@ -24,7 +24,7 @@ async def toggle_pending(db, auth_session: AuthSession):
2424
await AuthSessionCRUD(db).patch(auth_session.id, auth_session)
2525
sid = await get_socket_id_for_pid(str(auth_session.id), db)
2626
if sid:
27-
await sio.emit("status", {"status": "pending"}, to=sid)
27+
await safe_emit("status", {"status": "pending"}, to=sid)
2828

2929

3030
@router.get("/url/pres_exch/{pres_exch_id}")

0 commit comments

Comments
 (0)