Replies: 1 comment 1 reply
-
|
Custom middleware in hosted LiteLLM is valuable. Here is a pattern for building middleware: Middleware Architecturefrom typing import Callable, Dict, Any, Optional
from dataclasses import dataclass
import time
@dataclass
class MiddlewareContext:
request: Dict[str, Any]
response: Optional[Dict[str, Any]] = None
metadata: Dict[str, Any] = None
start_time: float = 0
def __post_init__(self):
self.metadata = self.metadata or {}
self.start_time = time.time()
class MiddlewareChain:
def __init__(self):
self.pre_request: list[Callable] = []
self.post_response: list[Callable] = []
def add_pre_request(self, middleware: Callable):
self.pre_request.append(middleware)
def add_post_response(self, middleware: Callable):
self.post_response.append(middleware)
async def process_request(self, ctx: MiddlewareContext) -> MiddlewareContext:
for middleware in self.pre_request:
ctx = await middleware(ctx)
if ctx.response: # Short circuit if middleware returns response
return ctx
return ctx
async def process_response(self, ctx: MiddlewareContext) -> MiddlewareContext:
for middleware in self.post_response:
ctx = await middleware(ctx)
return ctxExample Middleware# Rate limiting middleware
async def rate_limit_middleware(ctx: MiddlewareContext) -> MiddlewareContext:
user_id = ctx.request.get("user")
if not check_rate_limit(user_id):
ctx.response = {"error": "Rate limit exceeded"}
return ctx
# Cost tracking middleware
async def cost_tracking_middleware(ctx: MiddlewareContext) -> MiddlewareContext:
if ctx.response:
tokens = ctx.response.get("usage", {})
cost = calculate_cost(ctx.request["model"], tokens)
ctx.metadata["cost"] = cost
await log_cost(ctx.request.get("user"), cost)
return ctx
# PII redaction middleware
async def pii_redaction_middleware(ctx: MiddlewareContext) -> MiddlewareContext:
messages = ctx.request.get("messages", [])
for msg in messages:
msg["content"] = redact_pii(msg["content"])
return ctx
# Register middleware
chain = MiddlewareChain()
chain.add_pre_request(rate_limit_middleware)
chain.add_pre_request(pii_redaction_middleware)
chain.add_post_response(cost_tracking_middleware)For Hosted LiteLLMIf self-hosting is not an option, consider:
More on agent patterns: https://github.com/KeepALifeUS/autonomous-agents |
Beta Was this translation helpful? Give feedback.
1 reply
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Hi!
While exploring the LiteLLM source code, I found the middleware directory (e.g., https://github.com/BerriAI/litellm/blob/main/litellm/proxy/middleware/prometheus_auth_middleware.py), which handles authentication for specific types of requests, such as Prometheus queries.
Is it possible to develop and register custom middleware instances in the Hosted version of LiteLLM? If so, what is the process for doing this?
Specifically, I’m interested in:
Whether the hosted version supports custom middleware.
Steps to develop and integrate custom middleware (e.g., for auth, logging, or request/response modification).
Any limitations or best practices to consider.
Thanks in advance for your guidance!
Beta Was this translation helpful? Give feedback.
All reactions