fixing typing errors

lisadunlap · lisadunlap · commit 0fb15fe9ea76 · 2026-01-16T19:42:08.000Z
diff --git a/stringsight/_public/sync_api.py b/stringsight/_public/sync_api.py
@@ -391,6 +391,11 @@ def explain(
         from ..prompts.expansion.trace_based import expand_task_description
         from ..formatters.traces import format_single_trace_from_row, format_side_by_side_trace_from_row
 
+        if task_description is None:
+            raise ValueError(
+                "task_description must be provided when prompt_expansion=True and use_dynamic_prompts=False."
+            )
+
         if verbose:
             logger.info("[DEPRECATED] Using old prompt_expansion. Consider use_dynamic_prompts instead.")
             logger.info("Expanding task description using example traces...")
diff --git a/stringsight/api.py b/stringsight/api.py
@@ -12,7 +12,7 @@
 
 from __future__ import annotations
 
-from typing import Any, Dict, List, Literal
+from typing import Any, Dict, List, Literal, cast
 import asyncio
 import io
 import os
@@ -561,8 +561,10 @@ async def _run_cluster_job_async(job: ClusterJob, req: ClusterRunRequest):
 
         # Create minimal conversations that match the properties
         conversations: List[ConversationRecord] = []
-        all_models = set()
-        property_keys = {(prop.question_id, prop.model) for prop in properties}
+        all_models: set[str] = set()
+        property_keys: set[tuple[str, str]] = {
+            (prop.question_id, cast(str, prop.model)) for prop in properties
+        }
 
         logger.info(f"Found {len(property_keys)} unique (question_id, model) pairs from {len(properties)} properties")
 
@@ -695,9 +697,11 @@ async def _run_cluster_job_async(job: ClusterJob, req: ClusterRunRequest):
                         meta["winner"] = matching_row["score"]["winner"]
 
                     # Create SxS conversation record
+                    model_a_str = model_a if isinstance(model_a, str) else str(model_a)
+                    model_b_str = model_b if isinstance(model_b, str) else str(model_b)
                     conv = ConversationRecord(
                         question_id=qid,
-                        model=[model_a, model_b],
+                        model=[model_a_str, model_b_str],
                         prompt=matching_row.get("prompt", ""),
                         responses=[matching_row.get("model_a_response", ""), matching_row.get("model_b_response", "")],
                         scores=[score_a, score_b],
diff --git a/stringsight/postprocess/parser.py b/stringsight/postprocess/parser.py
@@ -47,10 +47,12 @@ def __init__(
 
     def _parse_single_property(self, index: int, prop: Property, total_props: int) -> Dict[str, Any]:
         """Parse a single property response. Returns dict with results and errors."""
-        result = {
+        parsed_properties: List[Property] = []
+        errors: List[Dict[str, Any]] = []
+        result: Dict[str, Any] = {
             'index': index,
-            'parsed_properties': [],
-            'errors': [],
+            'parsed_properties': parsed_properties,
+            'errors': errors,
             'parse_failed': False,
             'empty_response': False
         }
@@ -63,7 +65,7 @@ def _parse_single_property(self, index: int, prop: Property, total_props: int) -
         if parsed_json is None:
             result['parse_failed'] = True
             error_details = self._analyze_json_parsing_error(prop.raw_response)
-            result['errors'].append({
+            errors.append({
                 'property_id': prop.id,
                 'question_id': prop.question_id,
                 'model': prop.model,
@@ -84,7 +86,7 @@ def _parse_single_property(self, index: int, prop: Property, total_props: int) -
         else:
             result['parse_failed'] = True
             error_details = f"Parsed JSON has unsupported type: {type(parsed_json)}. Expected dict, list, or dict with 'properties' key."
-            result['errors'].append({
+            errors.append({
                 'property_id': prop.id,
                 'question_id': prop.question_id,
                 'model': prop.model,
@@ -98,7 +100,7 @@ def _parse_single_property(self, index: int, prop: Property, total_props: int) -
 
         # Process property dicts
         if not prop_dicts or (isinstance(prop_dicts, list) and len(prop_dicts) == 0):
-            result['errors'].append({
+            errors.append({
                 'property_id': prop.id,
                 'question_id': prop.question_id,
                 'model': prop.model,
@@ -123,7 +125,7 @@ def _parse_single_property(self, index: int, prop: Property, total_props: int) -
                 contains_errors=prop_dict.get("contains_errors"),
                 raw_response=prop.raw_response,
             )
-            result['parsed_properties'].append(new_prop)
+            parsed_properties.append(new_prop)
 
         return result
 
diff --git a/stringsight/prompts/dynamic/discovery_generator.py b/stringsight/prompts/dynamic/discovery_generator.py
@@ -4,10 +4,9 @@
 This module generates custom discovery prompt sections tailored to specific tasks.
 """
 
-import json
 import litellm
 import logging
-from typing import Dict, Any
+from typing import Dict, Any, cast
 from concurrent.futures import ThreadPoolExecutor
 from ...core.caching import UnifiedCache, CacheKeyBuilder
 
@@ -49,10 +48,7 @@ def generate(
         # Check cache
         cached = self.cache.get_completion(cache_key)
         if cached is not None:
-            try:
-                return json.loads(cached)
-            except json.JSONDecodeError:
-                logger.warning("Invalid JSON in cache, regenerating...")
+            return cast(Dict[str, str], cached)
 
         # Generate custom sections in parallel
         try:
@@ -88,7 +84,7 @@ def generate(
         # Keep: json_schema, model_naming_rule, reasoning_suffix from base
 
         # Cache result
-        self.cache.set_completion(cache_key, json.dumps(custom_config))
+        self.cache.set_completion(cache_key, custom_config)
         return custom_config
 
     def _generate_intro_task(
@@ -210,7 +206,7 @@ def _build_cache_key(
         expanded_description: str,
         method: str,
         model: str
-    ) -> str:
+    ) -> CacheKeyBuilder:
         """Build cache key for discovery prompt generation.
 
         Args:
@@ -219,7 +215,7 @@ def _build_cache_key(
             model: LLM model.
 
         Returns:
-            Cache key string.
+            CacheKeyBuilder for use with UnifiedCache.
         """
         from .meta_prompts import (
             INTRO_TASK_GENERATION_TEMPLATE,
@@ -244,5 +240,4 @@ def _build_cache_key(
                 "analysis_process": ANALYSIS_PROCESS_GENERATION_TEMPLATE,
             }).get_key(),
         }
-        builder = CacheKeyBuilder(cache_data)
-        return builder.get_key()
+        return CacheKeyBuilder(cache_data)
diff --git a/stringsight/prompts/dynamic/task_expander.py b/stringsight/prompts/dynamic/task_expander.py
@@ -7,7 +7,7 @@
 
 import random
 import tiktoken
-from typing import List, Dict, Any
+from typing import List, Dict, Any, cast
 from ...core.data_objects import ConversationRecord
 from ...core.caching import UnifiedCache, CacheKeyBuilder
 from ..expansion.trace_based import expand_task_description
@@ -60,7 +60,7 @@ def expand(
         # Check cache
         cached = self.cache.get_completion(cache_key)
         if cached is not None:
-            return cached
+            return cast(str, cached["expanded_task_description"])
 
         # Convert to trace format and truncate
         traces = []
@@ -79,7 +79,7 @@ def expand(
         )
 
         # Cache result
-        self.cache.set_completion(cache_key, expanded)
+        self.cache.set_completion(cache_key, {"expanded_task_description": expanded})
         return expanded
 
     def _sample_conversations(
@@ -218,7 +218,7 @@ def _build_cache_key(
         task_description: str,
         sample_ids: List[str],
         model: str
-    ) -> str:
+    ) -> CacheKeyBuilder:
         """Build cache key for task expansion.
 
         Args:
@@ -227,7 +227,7 @@ def _build_cache_key(
             model: LLM model used for expansion.
 
         Returns:
-            Cache key string.
+            CacheKeyBuilder for use with UnifiedCache.
         """
         cache_data = {
             "type": "task_expansion",
@@ -237,5 +237,4 @@ def _build_cache_key(
             "max_tokens_per_sample": self.max_tokens,
             "version": "1.0",
         }
-        builder = CacheKeyBuilder(cache_data)
-        return builder.get_key()
+        return CacheKeyBuilder(cache_data)
diff --git a/stringsight/routers/prompts.py b/stringsight/routers/prompts.py
@@ -155,6 +155,11 @@ async def generate_prompts_endpoint(req: GeneratePromptsRequest) -> Dict[str, An
         logger.info(f"Prompt generation completed in {generation_time:.2f}s")
 
         # Return metadata
+        if prompts_metadata is None:
+            raise HTTPException(
+                status_code=500,
+                detail="Prompt generation succeeded but returned no metadata."
+            )
         return {
             "prompts": prompts_metadata.dict(),
             "generation_time_seconds": generation_time
diff --git a/stringsight/workers/tasks.py b/stringsight/workers/tasks.py
@@ -1,6 +1,6 @@
 import asyncio
 import logging
-from typing import Dict, Any, List
+from typing import Dict, Any, List, cast
 from datetime import datetime
 from pathlib import Path
 import pandas as pd
@@ -384,8 +384,10 @@ def update_progress(progress: float):
         # Phase 2: Create conversations (10%)
         update_progress(0.10)
         conversations: List[ConversationRecord] = []
-        all_models = set()
-        property_keys = {(prop.question_id, prop.model) for prop in properties}
+        all_models: set[str] = set()
+        property_keys: set[tuple[str, str]] = {
+            (prop.question_id, cast(str, prop.model)) for prop in properties
+        }
         
         for question_id, model in property_keys:
             all_models.add(model)
@@ -477,9 +479,11 @@ def update_progress(progress: float):
                     elif "score" in matching_row and isinstance(matching_row["score"], dict) and "winner" in matching_row["score"]:
                         meta["winner"] = matching_row["score"]["winner"]
                     
+                    model_a_str = model_a if isinstance(model_a, str) else str(model_a)
+                    model_b_str = model_b if isinstance(model_b, str) else str(model_b)
                     conv = ConversationRecord(
                         question_id=qid,
-                        model=[model_a, model_b],
+                        model=[model_a_str, model_b_str],
                         prompt=matching_row.get("prompt", ""),
                         responses=[matching_row.get("model_a_response", ""), matching_row.get("model_b_response", "")],
                         scores=[score_a, score_b],