llm factory update (#76)

yisz · web-flow · commit eb8384eeb229 · 2024-09-01T20:22:08.000-04:00
diff --git a/continuous_eval/llm_factory.py b/continuous_eval/llm_factory.py
@@ -1,11 +1,13 @@
+import json
 import os
 import warnings
 from abc import ABC, abstractmethod
 from typing import Dict
 
 from dotenv import load_dotenv
 from openai import OpenAI
-from tenacity import retry, stop_after_attempt, wait_random_exponential  # for exponential backoff
+from tenacity import stop_after_attempt  # for exponential backoff
+from tenacity import retry, wait_random_exponential
 
 load_dotenv()
 
@@ -39,7 +41,11 @@
 
 class LLMInterface(ABC):
     @abstractmethod
-    def run(self, prompt: Dict[str, str], temperature: float = 0, max_tokens: int = 1024) -> str:
+    def run(self, prompt: Dict[str, str], temperature: float = 0) -> str:
+        pass
+
+    @abstractmethod
+    def json(self, prompt: Dict[str, str], temperature: float = 0) -> str:
         pass
 
 
@@ -113,11 +119,11 @@ def __init__(self, model):
             raise ValueError(
                 f"Model {model} is not supported. "
                 "Please choose from one of the following LLM providers: "
-                "OpenAI gpt models, Anthropic claude models, Google Gemini models, Azure OpenAI deployment, Cohere models, AWS Bedrock, and VLLM model endpoints."
+                "OpenAI gpt models (e.g. gpt-4o-mini, gpt-4o), Anthropic claude models (e.g. claude-3.5-sonnet), Google Gemini models (e.g. gemini-pro), Azure OpenAI deployment (azure)"
             )
 
-    @retry(wait=wait_random_exponential(min=1, max=90), stop=stop_after_attempt(15))
-    def _llm_response(self, prompt, temperature, max_tokens):
+    @retry(wait=wait_random_exponential(min=1, max=90), stop=stop_after_attempt(50))
+    def _llm_response(self, prompt, temperature, max_tokens=1024):
         """
         Send a prompt to the LLM and return the response.
         """
@@ -163,7 +169,7 @@ def _llm_response(self, prompt, temperature, max_tokens):
             content = response.completion
         elif COHERE_AVAILABLE and isinstance(self.client, CohereClient):
             prompt = f"{prompt['system_prompt']}\n{prompt['user_prompt']}"
-            response = self.client.generate(model="command", prompt=prompt, temperature=temperature, max_tokens=max_tokens)  # type: ignore
+            response = self.client.generate(model="command", prompt=prompt, temperature=temperature, max_tokens=1024)  # type: ignore
             try:
                 content = response.generations[0].text
             except:
@@ -222,10 +228,18 @@ def run(self, prompt, temperature=0, max_tokens=1024):
         """
         Run the LLM and return the response.
         Default temperature: 0
-        Default max_tokens: 1024
         """
         content = self._llm_response(prompt=prompt, temperature=temperature, max_tokens=max_tokens)
         return content
 
+    def json(self, prompt, temperature=0, max_tokens=1024, **kwargs):
+        llm_output = self.run(prompt, temperature, max_tokens=max_tokens)
+        if "{" in llm_output:
+            first_bracket = llm_output.index("{")
+            json_output = llm_output[first_bracket:].strip("```").strip(" ")
+        else:
+            json_output = llm_output.strip("```").strip(" ").replace("json", "")
+        return json.loads(json_output)
+
 
-DefaultLLM = lambda: LLMFactory(model=os.getenv("EVAL_LLM", "gpt-3.5-turbo-0125"))
+DefaultLLM = lambda: LLMFactory(model=os.getenv("EVAL_LLM", "gpt-4o-mini"))