add LLM-as-a-Judge detections via vllm-judge

saichandrapandraju · saichandrapandraju · commit 68fa0e997f39 · 2025-06-10T19:31:24.000-04:00
diff --git a/detectors/llm_judge/README.md b/detectors/llm_judge/README.md
diff --git a/detectors/llm_judge/app.py b/detectors/llm_judge/app.py
@@ -0,0 +1,73 @@
+import os
+import sys
+from contextlib import asynccontextmanager
+from typing import Annotated, Dict
+
+from fastapi import Header
+from prometheus_fastapi_instrumentator import Instrumentator
+sys.path.insert(0, os.path.abspath(".."))
+
+from common.app import DetectorBaseAPI as FastAPI
+from detector import LLMJudgeDetector
+from scheme import (
+    ContentAnalysisHttpRequest,
+    ContentsAnalysisResponse,
+    MetricsListResponse,
+    Error,
+)
+
+detector_objects: Dict[str, LLMJudgeDetector] = {}
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan management."""
+    try:
+        detector_objects["detector"] = LLMJudgeDetector()
+        yield
+    finally:
+        # Clean up resources
+        if "detector" in detector_objects:
+            await detector_objects["detector"].close()
+        detector_objects.clear()
+
+
+app = FastAPI(lifespan=lifespan, dependencies=[])
+Instrumentator().instrument(app).expose(app)
+
+
+@app.post(
+    "/api/v1/text/contents",
+    response_model=ContentsAnalysisResponse,
+    description="""LLM-as-Judge detector that evaluates content using various metrics like safety, toxicity, accuracy, helpfulness, etc. \
+                    The metric parameter allows you to specify which evaluation criteria to use. \
+                    Supports all built-in vllm_judge metrics including safety, accuracy, helpfulness, clarity, and many more.""",
+    responses={
+        404: {"model": Error, "description": "Resource Not Found"},
+        422: {"model": Error, "description": "Validation Error"},
+    },
+)
+async def detector_unary_handler(
+    request: ContentAnalysisHttpRequest,
+    detector_id: Annotated[str, Header(example="llm_judge_safety")],
+):
+    """Analyze content using LLM-as-Judge evaluation."""
+    return ContentsAnalysisResponse(root=await detector_objects["detector"].run(request))
+
+
+@app.get(
+    "/api/v1/metrics",
+    response_model=MetricsListResponse,
+    description="List all available metrics for LLM Judge evaluation",
+    responses={
+        404: {"model": Error, "description": "Resource Not Found"},
+    },
+)
+async def list_metrics():
+    """List all available evaluation metrics."""
+    detector = detector_objects.get("detector")
+    if not detector:
+        return {"metrics": [], "total": 0}
+    
+    metrics = detector.list_available_metrics()
+    return MetricsListResponse(metrics=metrics, total=len(metrics))
diff --git a/detectors/llm_judge/detector.py b/detectors/llm_judge/detector.py
@@ -0,0 +1,115 @@
+import os
+import sys
+from typing import List, Dict, Any
+
+sys.path.insert(0, os.path.abspath(".."))
+
+from vllm_judge import Judge, EvaluationResult, BUILTIN_METRICS
+from vllm_judge.exceptions import MetricNotFoundError
+from common.app import logger
+from scheme import (
+    ContentAnalysisHttpRequest,
+    ContentAnalysisResponse,
+    ContentsAnalysisResponse,
+)
+
+
+class LLMJudgeDetector:
+    """LLM-as-Judge detector for evaluating content using vllm_judge."""
+    
+    def __init__(self) -> None:
+        """Initialize the LLM Judge Detector."""
+        self.judge = None
+        self.available_metrics = set(BUILTIN_METRICS.keys())
+        
+        # Get configuration from environment
+        self.vllm_base_url = os.environ.get("VLLM_BASE_URL")
+        self.vllm_model = os.environ.get("VLLM_MODEL")
+        
+        if not self.vllm_base_url:
+            raise ValueError("VLLM_BASE_URL environment variable is required")
+        
+        logger.info(f"Initializing LLM Judge with URL: {self.vllm_base_url}")
+        
+        self._initialize_judge()
+    
+    def _initialize_judge(self) -> None:
+        """Initialize the vLLM Judge."""
+        try:
+            self.judge = Judge.from_url(base_url=self.vllm_base_url)
+            logger.info(f"LLM Judge initialized successfully with model: {self.judge.config.model} and base url: {self.judge.config.base_url}")
+            logger.info(f"Available metrics: {', '.join(sorted(self.available_metrics))}")
+            
+        except Exception as e:
+            logger.error(f"Failed to initialize LLM Judge: {e}")
+            raise
+    
+    async def evaluate_single_content(self, content: str, params: Dict[str, Any]) -> ContentAnalysisResponse:
+        """
+        Evaluate a single piece of content using the specified metric.
+        
+        Args:
+            content: Text content to evaluate
+            params: vLLM Judge parameters for the evaluation
+            
+        Returns:
+            ContentAnalysisResponse with evaluation results
+        """
+        if "metric" not in params and "criteria" not in params:
+            params["metric"] = "safety" # Default to safety
+
+        evaluation_params = {
+            "content": content,
+            **params
+        }
+        
+        # Perform evaluation
+        result: EvaluationResult = await self.judge.evaluate(
+            **evaluation_params
+        )
+        
+        # Convert to response format
+        score = None
+        if isinstance(result.decision, (int, float)) or result.score is not None:
+            # Numeric result
+            score = float(result.score if result.score is not None else result.decision)
+        
+        return ContentAnalysisResponse(
+            start=0,
+            end=len(content),
+            detection=str(result.decision),
+            detection_type="llm_judge",
+            score=score,
+            text=content,
+            evidences=[],
+            metadata={"reasoning": result.reasoning}
+        )
+
+    async def run(self, request: ContentAnalysisHttpRequest) -> ContentsAnalysisResponse:
+        """
+        Run content analysis for each input text.
+        
+        Args:
+            request: Input request containing texts and metric to analyze
+            
+        Returns:
+            ContentsAnalysisResponse: The aggregated response for all input texts
+        """
+
+        contents_analyses = []
+        
+        for content in request.contents:
+            analysis = await self.evaluate_single_content(content, request.detector_params)
+            contents_analyses.append([analysis])  # Wrap in list to match schema
+        
+        return contents_analyses
+            
+    
+    async def close(self):
+        """Close the judge client."""
+        if self.judge:
+            await self.judge.close()
+    
+    def list_available_metrics(self) -> List[str]:
+        """Return list of available metrics."""
+        return sorted(list(self.available_metrics))
diff --git a/detectors/llm_judge/requirements.txt b/detectors/llm_judge/requirements.txt
@@ -0,0 +1 @@
+vllm-judge>=0.1.5
diff --git a/detectors/llm_judge/scheme.py b/detectors/llm_judge/scheme.py
@@ -0,0 +1,74 @@
+from enum import Enum
+from typing import List, Optional, Dict, Any
+from pydantic import BaseModel, Field, RootModel
+
+
+class Evidence(BaseModel):
+    source: str = Field(
+        title="Source",
+        example="https://en.wikipedia.org/wiki/IBM",
+        description="Source of the evidence, it can be url of the evidence etc",
+    )
+
+
+class EvidenceType(str, Enum):
+    url = "url"
+    title = "title"
+
+
+class EvidenceObj(BaseModel):
+    type: EvidenceType = Field(
+        title="EvidenceType",
+        example="url",
+        description="Type field signifying the type of evidence provided. Example url, title etc",
+    )
+    evidence: Evidence = Field(
+        description="Evidence object, currently only containing source, but in future can contain other optional arguments like id, etc",
+    )
+
+
+class ContentAnalysisHttpRequest(BaseModel):
+    contents: List[str] = Field(
+        min_length=1,
+        title="Contents",
+        description="Field allowing users to provide list of texts for analysis. Note, results of this endpoint will contain analysis / detection of each of the provided text in the order they are present in the contents object.",
+        example=[
+            "Martians are like crocodiles; the more you give them meat, the more they want"
+        ],
+    )
+    detector_params: Optional[Dict[str, Any]] = Field(
+        default_factory=dict, 
+        description="Detector parameters for evaluation (e.g., metric, criteria, etc.)",
+        example={"metric": "safety"}
+    )
+
+
+class ContentAnalysisResponse(BaseModel):
+    start: int = Field(example=0)
+    end: int = Field(example=75)
+    text: str = Field(example="This is a safe and helpful response")
+    detection: str = Field(example="vllm_model")
+    detection_type: str = Field(example="llm_judge")
+    score: float = Field(example=0.8)
+    evidences: Optional[List[EvidenceObj]] = Field(
+        description="Optional field providing evidences for the provided detection",
+        default=[],
+    )
+    metadata: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Additional metadata from evaluation")
+
+
+class ContentsAnalysisResponse(RootModel):
+    root: List[List[ContentAnalysisResponse]] = Field(
+        title="Response Text Content Analysis LLM Judge"
+    )
+
+
+class Error(BaseModel):
+    code: int
+    message: str
+
+
+class MetricsListResponse(BaseModel):
+    """Response for listing available metrics."""
+    metrics: List[str] = Field(description="List of available metric names")
+    total: int = Field(description="Total number of available metrics")