Skip to content

Commit 2398fc4

Browse files
homanpcursoragent
andauthored
feat: fallback url when hitting cold starts (#1113)
* feat: add cold start fallback for superagent models Add automatic fallback to always-on endpoint when primary Cloud Run endpoints experience cold starts. - Add timeout-based fallback (default 5s) using AbortController (TS) / asyncio (Python) - Add DEFAULT_FALLBACK_URL constant pointing to https://superagent.sh/api/fallback - Add getFallbackUrl() helper with priority: client option > env var > default - Add FallbackOptions interface/dataclass for configuration - Add enableFallback, fallbackTimeoutMs/fallback_timeout, fallbackUrl options to ClientConfig - Support SUPERAGENT_FALLBACK_URL environment variable override - Add unit tests for fallback configuration and logic - Bump version to 0.1.4 Co-authored-by: Cursor <[email protected]> * chore: bump cli and mcp to use safety-agent v0.1.4 - cli: 0.1.3 → 0.1.4 - mcp: 0.1.2 → 0.1.3 - Update safety-agent dependency to ^0.1.4 Co-authored-by: Cursor <[email protected]> --------- Co-authored-by: Cursor <[email protected]>
1 parent 8bbb687 commit 2398fc4

File tree

16 files changed

+618
-54
lines changed

16 files changed

+618
-54
lines changed

cli/package-lock.json

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cli/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "safety-agent-cli",
3-
"version": "0.1.3",
3+
"version": "0.1.4",
44
"description": "CLI for Superagent - validate prompts and tool calls for security",
55
"type": "module",
66
"main": "./dist/index.js",
@@ -42,6 +42,6 @@
4242
"node": ">=18"
4343
},
4444
"dependencies": {
45-
"safety-agent": "^0.1.3"
45+
"safety-agent": "^0.1.4"
4646
}
4747
}

mcp/package-lock.json

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

mcp/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "safety-agent-mcp",
3-
"version": "0.1.2",
3+
"version": "0.1.3",
44
"description": "MCP server for Superagent.sh API integration - security guardrails, PII redaction, and claim verification",
55
"type": "module",
66
"main": "dist/index.js",
@@ -33,7 +33,7 @@
3333
"license": "MIT",
3434
"dependencies": {
3535
"@modelcontextprotocol/sdk": "^1.6.1",
36-
"safety-agent": "^0.1.3",
36+
"safety-agent": "^0.1.4",
3737
"zod": "^3.23.8"
3838
},
3939
"devDependencies": {

sdk/python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "safety-agent"
3-
version = "0.1.3"
3+
version = "0.1.4"
44
description = "A lightweight Python guardrail SDK for content safety"
55
readme = "README.md"
66
license = "MIT"

sdk/python/src/safety_agent/client.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
ProcessedInput,
2828
MultimodalContentPart,
2929
)
30-
from .providers import call_provider, parse_model, DEFAULT_GUARD_MODEL
30+
from .providers import call_provider, parse_model, DEFAULT_GUARD_MODEL, FallbackOptions
3131
from .prompts.guard import build_guard_user_message, build_guard_system_prompt
3232
from .prompts.redact import build_redact_system_prompt, build_redact_user_message
3333
from .schemas import GUARD_RESPONSE_FORMAT, REDACT_RESPONSE_FORMAT
@@ -214,6 +214,11 @@ def __init__(self, config: ClientConfig | None = None):
214214
)
215215

216216
self._api_key = api_key
217+
self._fallback_options = FallbackOptions(
218+
enable_fallback=config.enable_fallback if config else None,
219+
fallback_timeout=config.fallback_timeout if config else None,
220+
fallback_url=config.fallback_url if config else None,
221+
)
217222

218223
def _post_usage(self, usage: TokenUsage) -> None:
219224
"""Post usage metrics to Superagent dashboard (fire and forget)."""
@@ -258,7 +263,9 @@ async def _guard_single_text(
258263
response_format = (
259264
GUARD_RESPONSE_FORMAT if _supports_structured_output(model) else None
260265
)
261-
response = await call_provider(model, messages, response_format)
266+
response = await call_provider(
267+
model, messages, response_format, self._fallback_options
268+
)
262269
content = response.choices[0].message.content
263270

264271
if not content:
@@ -314,7 +321,9 @@ async def _guard_image(
314321
response_format = (
315322
GUARD_RESPONSE_FORMAT if _supports_structured_output(model) else None
316323
)
317-
response = await call_provider(model, messages, response_format)
324+
response = await call_provider(
325+
model, messages, response_format, self._fallback_options
326+
)
318327
content = response.choices[0].message.content
319328

320329
if not content:
@@ -498,7 +507,9 @@ async def redact(
498507
response_format = (
499508
REDACT_RESPONSE_FORMAT if _supports_structured_output(model) else None
500509
)
501-
response = await call_provider(model, messages, response_format)
510+
response = await call_provider(
511+
model, messages, response_format, self._fallback_options
512+
)
502513
content = response.choices[0].message.content
503514

504515
if not content:
@@ -695,17 +706,37 @@ def create_client(
695706
api_key: str | None = None,
696707
*,
697708
config: ClientConfig | None = None,
709+
enable_fallback: bool | None = None,
710+
fallback_timeout: float | None = None,
711+
fallback_url: str | None = None,
698712
) -> SafetyClient:
699713
"""
700714
Create a new Safety Agent client.
701715
702716
Args:
703717
api_key: API key for Superagent usage tracking
704718
config: Optional client configuration
719+
enable_fallback: Enable fallback to always-on endpoint on cold start timeout.
720+
Default: True for superagent provider.
721+
fallback_timeout: Timeout in seconds before falling back. Default: 5.0.
722+
fallback_url: Custom fallback URL. If not provided, uses env var or default.
705723
706724
Returns:
707725
SafetyClient instance
708726
"""
709-
if api_key:
710-
config = ClientConfig(api_key=api_key)
727+
if config is None:
728+
config = ClientConfig(
729+
api_key=api_key,
730+
enable_fallback=enable_fallback,
731+
fallback_timeout=fallback_timeout,
732+
fallback_url=fallback_url,
733+
)
734+
elif api_key:
735+
# Override api_key if provided directly
736+
config = ClientConfig(
737+
api_key=api_key,
738+
enable_fallback=enable_fallback or config.enable_fallback,
739+
fallback_timeout=fallback_timeout or config.fallback_timeout,
740+
fallback_url=fallback_url or config.fallback_url,
741+
)
711742
return SafetyClient(config)

sdk/python/src/safety_agent/providers/__init__.py

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
Provider registry and utilities
33
"""
44

5+
import asyncio
56
import os
7+
from dataclasses import dataclass
68
from typing import Any
79

810
import httpx
@@ -18,7 +20,26 @@
1820
from .openrouter import openrouter_provider
1921
from .openai_compatible import openai_compatible_provider
2022
from .vercel import vercel_provider
21-
from .superagent import superagent_provider
23+
from .superagent import (
24+
superagent_provider,
25+
get_fallback_url,
26+
DEFAULT_FALLBACK_TIMEOUT,
27+
DEFAULT_FALLBACK_URL,
28+
)
29+
30+
31+
@dataclass
32+
class FallbackOptions:
33+
"""Options for fallback behavior on cold starts."""
34+
35+
enable_fallback: bool | None = None
36+
"""Enable fallback to always-on endpoint on timeout. Default: True for superagent provider."""
37+
38+
fallback_timeout: float | None = None
39+
"""Timeout in seconds before falling back. Default: 5.0."""
40+
41+
fallback_url: str | None = None
42+
"""Custom fallback URL. If not provided, uses env var or default."""
2243

2344
# Default model for guard() when no model is specified
2445
DEFAULT_GUARD_MODEL = "superagent/guard-1.7b"
@@ -82,6 +103,7 @@ async def call_provider(
82103
model_string: str,
83104
messages: list[ChatMessage],
84105
response_format: ResponseFormat | None = None,
106+
fallback_options: FallbackOptions | None = None,
85107
) -> AnalysisResponse:
86108
"""Call an LLM provider with the given messages."""
87109
parsed = parse_model(model_string)
@@ -108,7 +130,67 @@ async def call_provider(
108130
payload = json.dumps(request_body)
109131
headers = provider.get_signed_headers(url, "POST", payload, api_key)
110132

111-
# Make request
133+
# Determine if fallback is enabled (default: True for superagent provider)
134+
is_superagent = parsed.provider == "superagent"
135+
fallback_opts = fallback_options or FallbackOptions()
136+
enable_fallback = fallback_opts.enable_fallback if fallback_opts.enable_fallback is not None else is_superagent
137+
fallback_timeout = fallback_opts.fallback_timeout or DEFAULT_FALLBACK_TIMEOUT
138+
fallback_url = get_fallback_url(fallback_opts.fallback_url)
139+
140+
# Check if fallback is enabled and URL is available
141+
fallback_available = (
142+
enable_fallback
143+
and fallback_url
144+
and fallback_url != "FALLBACK_ENDPOINT_PLACEHOLDER"
145+
)
146+
147+
if fallback_available:
148+
# Use timeout-based fallback
149+
try:
150+
async with httpx.AsyncClient() as client:
151+
response = await asyncio.wait_for(
152+
client.post(
153+
url,
154+
headers=headers,
155+
json=request_body,
156+
timeout=60.0,
157+
),
158+
timeout=fallback_timeout,
159+
)
160+
161+
if response.status_code != 200:
162+
raise RuntimeError(
163+
f"Provider API error ({response.status_code}): {response.text}"
164+
)
165+
166+
response_data = response.json()
167+
return provider.transform_response(response_data)
168+
169+
except asyncio.TimeoutError:
170+
# Retry on fallback endpoint
171+
print(
172+
f"Primary endpoint timed out after {fallback_timeout}s, "
173+
f"falling back to always-on endpoint"
174+
)
175+
176+
async with httpx.AsyncClient() as client:
177+
fallback_response = await client.post(
178+
fallback_url,
179+
headers=headers,
180+
json=request_body,
181+
timeout=60.0,
182+
)
183+
184+
if fallback_response.status_code != 200:
185+
raise RuntimeError(
186+
f"Fallback provider API error ({fallback_response.status_code}): "
187+
f"{fallback_response.text}"
188+
)
189+
190+
fallback_data = fallback_response.json()
191+
return provider.transform_response(fallback_data)
192+
193+
# No fallback - standard request
112194
async with httpx.AsyncClient() as client:
113195
response = await client.post(
114196
url,
@@ -133,4 +215,5 @@ async def call_provider(
133215
"parse_model",
134216
"get_provider",
135217
"call_provider",
218+
"FallbackOptions",
136219
]

sdk/python/src/safety_agent/providers/superagent.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import json
6+
import os
67
import re
78
from typing import Any
89

@@ -16,6 +17,23 @@
1617
"guard-4b": "https://superagent-guard-medium-408394858807.us-central1.run.app/api/chat",
1718
}
1819

20+
# Default fallback URL for cold start mitigation.
21+
# This always-on endpoint handles requests when the primary endpoint has a cold start.
22+
DEFAULT_FALLBACK_URL = "https://superagent.sh/api/fallback"
23+
24+
# Default timeout in seconds before falling back to the always-on endpoint.
25+
DEFAULT_FALLBACK_TIMEOUT = 5.0
26+
27+
28+
def get_fallback_url(client_option: str | None = None) -> str:
29+
"""
30+
Get the fallback URL based on priority:
31+
1. Client option (highest priority)
32+
2. Environment variable SUPERAGENT_FALLBACK_URL
33+
3. Default constant (lowest priority)
34+
"""
35+
return client_option or os.environ.get("SUPERAGENT_FALLBACK_URL") or DEFAULT_FALLBACK_URL
36+
1937

2038
class SuperagentProvider:
2139
"""Superagent provider configuration using Ollama-style API."""

sdk/python/src/safety_agent/types.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,15 @@ class ClientConfig:
1818
api_key: str | None = None
1919
"""API key for Superagent usage tracking. Defaults to SUPERAGENT_API_KEY env var."""
2020

21+
enable_fallback: bool | None = None
22+
"""Enable fallback to always-on endpoint on cold start timeout. Default: True for superagent provider."""
23+
24+
fallback_timeout: float | None = None
25+
"""Timeout in seconds before falling back to always-on endpoint. Default: 5.0."""
26+
27+
fallback_url: str | None = None
28+
"""Custom fallback URL. If not provided, uses SUPERAGENT_FALLBACK_URL env var or built-in default."""
29+
2130

2231
# =============================================================================
2332
# Model Types

0 commit comments

Comments
 (0)