SamuelSchmidgall · CCranney · Jan 19, 2025
diff --git a/agents.py b/agents.py
@@ -1,7 +1,9 @@
 from utils import *
 from tools import *
-from inference import *
+from llm_client import LLMQueryManager
+import json
 
+llm_query_manager = LLMQueryManager()
 
 def extract_json_between_markers(llm_output):
     # Regular expression pattern to find JSON content between ```json and ```
@@ -138,13 +140,13 @@ def get_score(outlined_plan, latex, reward_model_llm, reviewer_type=None, attemp
                       "You are an AI researcher who is reviewing a paper that was submitted to a prestigious ML venue. "
                       f"Be critical and cautious in your decision. {reviewer_type}\n"
                   ) + neurips_form
-            scoring = query_model(
-                model_str=f"{reward_model_llm}",
+            scoring = llm_query_manager.query_model(
+                model_name=f"{reward_model_llm}",
                 system_prompt=sys,
-                openai_api_key=openai_api_key,
+                api_key=openai_api_key,
                 prompt=(
                     f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\n"
-                    f"The following text is the research latex that the model produced: \n{latex}\n\n"), temp=0.0)
+                    f"The following text is the research latex that the model produced: \n{latex}\n\n"), temperature=0.0)
             review_json = extract_json_between_markers(scoring)
 
             overall = int(review_json["Overall"]) / 10
@@ -251,7 +253,7 @@ def inference(self, research_topic, phase, step, feedback="", temp=None):
             f"Current Step #{step}, Phase: {phase}\n{complete_str}\n"
             f"[Objective] Your goal is to perform research on the following topic: {research_topic}\n"
             f"Feedback: {feedback}\nNotes: {notes_str}\nYour previous command was: {self.prev_comm}. Make sure your new output is very different.\nPlease produce a single command below:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, temp=temp, openai_api_key=self.openai_api_key)
+        model_resp = llm_query_manager.query_model(model_name=self.model, system_prompt=sys_prompt, prompt=prompt, api_key=self.openai_api_key)
         print("^"*50, phase, "^"*50)
         model_resp = self.clean_text(model_resp)
         self.prev_comm = model_resp
@@ -301,7 +303,7 @@ def generate_readme(self):
         prompt = (
             f"""History: {history_str}\n{'~' * 10}\n"""
             f"Please produce the readme below in markdown:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
+        model_resp = llm_query_manager.query_model(model_name=self.model, system_prompt=sys_prompt, prompt=prompt, api_key=self.openai_api_key)
         return model_resp.replace("```markdown", "")
 
     def context(self, phase):
@@ -618,7 +620,7 @@ def requirements_txt(self):
         prompt = (
             f"""History: {history_str}\n{'~' * 10}\n"""
             f"Please produce the requirements.txt below in markdown:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
+        model_resp = llm_query_manager.query_model(model_name=self.model, system_prompt=sys_prompt, prompt=prompt, api_key=self.openai_api_key)
         return model_resp
 
     def example_command(self, phase):

diff --git a/llm_client/__init__.py b/llm_client/__init__.py
@@ -0,0 +1,4 @@
+from .query_manager import LLMQueryManager
+from .models import TokenCounter
+
+__all__ = ['LLMQueryManager', 'TokenCounter']
diff --git a/llm_client/factory.py b/llm_client/factory.py
@@ -0,0 +1,38 @@
+from typing import Optional, Dict
+from .models import (
+    LLMStrategy,
+    ModelConfig,
+    OpenAIStrategy,
+    AnthropicStrategy,
+    DeepseekStrategy
+)
+
+class LLMStrategyFactory:
+    def __init__(self, api_keys: Optional[Dict[str, str]] = None):
+        self.api_keys = api_keys or {}
+        self.model_configs = {
+            "gpt-4o": ModelConfig("gpt-4o-2024-08-06", 2.50, 10.00, "openai"),
+            "gpt-4o-mini": ModelConfig("gpt-4o-mini-2024-07-18", 0.15, 0.60, "openai"),
+            "claude-3-5-sonnet": ModelConfig("claude-3-5-sonnet-latest", 3.00, 12.00, "anthropic"),
+            "deepseek-chat": ModelConfig("deepseek-chat", 1.00, 5.00, "deepseek"),
+            "o1-mini": ModelConfig("o1-mini-2024-09-12", 3.00, 12.00, "openai"),
+            "o1": ModelConfig("o1-2024-12-17", 15.00, 60.00, "openai"),
+            "o1-preview": ModelConfig("o1-preview", 15.00, 60.00, "openai"),
+        }
+
+    def create_strategy(self, model_name: str) -> LLMStrategy:
+        if model_name not in self.model_configs:
+            raise ValueError(f"Unknown model: {model_name}")
+
+        config = self.model_configs[model_name]
+        provider = config.provider
+        default_api_key = self.api_keys.get(provider)
+
+        if provider == "openai":
+            return OpenAIStrategy(config, default_api_key)
+        elif provider == "anthropic":
+            return AnthropicStrategy(config, default_api_key)
+        elif provider == "deepseek":
+            return DeepseekStrategy(config, default_api_key)
+        else:
+            raise ValueError(f"No strategy implementation for provider: {provider}")
diff --git a/llm_client/models.py b/llm_client/models.py
@@ -0,0 +1,130 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Optional, List, Dict, Mapping
+import tiktoken
+from openai import OpenAI
+import anthropic
+import json
+import os
+import time
+
+
+@dataclass
+class Message:
+    role: str
+    content: str
+
+
+@dataclass
+class ModelConfig:
+    model_name: str
+    input_cost_per_1k: float
+    output_cost_per_1k: float
+    provider: str
+    encoding_name: str = "cl100k_base"
+
+
+class TokenCounter:
+    def __init__(self):
+        self.tokens_in: Dict[str, int] = {}
+        self.tokens_out: Dict[str, int] = {}
+
+    def update_counts(self, model_name: str, input_tokens: int, output_tokens: int):
+        if model_name not in self.tokens_in:
+            self.tokens_in[model_name] = 0
+            self.tokens_out[model_name] = 0
+        self.tokens_in[model_name] += input_tokens
+        self.tokens_out[model_name] += output_tokens
+
+    def calculate_cost(self, model_configs: Dict[str, ModelConfig]) -> float:
+        total_cost = 0.0
+        for model_name, tokens in self.tokens_in.items():
+            if model_name in model_configs:
+                config = model_configs[model_name]
+                input_cost = (tokens * config.input_cost_per_1k) / 1_000_000
+                output_cost = (self.tokens_out[model_name] * config.output_cost_per_1k) / 1_000_000
+                total_cost += input_cost + output_cost
+        return total_cost
+
+
+class LLMStrategy(ABC):
+    def __init__(self, config: ModelConfig):
+        self.config = config
+        self.encoding = tiktoken.get_encoding(config.encoding_name)
+
+    @abstractmethod
+    def query(self, prompt: str, system_prompt: str, api_key: Optional[str] = None, temperature: Optional[float] = None) -> str:
+        pass
+
+    def count_tokens(self, text: str) -> int:
+        return len(self.encoding.encode(text))
+
+
+class OpenAIStrategy(LLMStrategy):
+    def __init__(self, config: ModelConfig, default_api_key: Optional[str] = None):
+        super().__init__(config)
+        self.default_api_key = default_api_key
+
+    def query(self, prompt: str, system_prompt: str, api_key: Optional[str] = None, temperature: Optional[float] = None) -> str:
+        used_key = api_key or self.default_api_key
+        if not used_key:
+            raise ValueError("No API key provided for OpenAI API")
+
+        client = OpenAI(api_key=used_key)
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": prompt}
+        ]
+
+        completion = client.chat.completions.create(
+            model=self.config.model_name,
+            messages=messages,
+            temperature=temperature,
+        )
+        return completion.choices[0].message.content
+
+
+class AnthropicStrategy(LLMStrategy):
+    def __init__(self, config: ModelConfig, default_api_key: Optional[str] = None):
+        super().__init__(config)
+        self.default_api_key = default_api_key
+
+    def query(self, prompt: str, system_prompt: str, api_key: Optional[str] = None, temperature: Optional[float] = None) -> str:
+        used_key = api_key or self.default_api_key
+        if not used_key:
+            raise ValueError("No API key provided for Anthropic API")
+
+        client = anthropic.Anthropic(api_key=used_key)
+        message = client.messages.create(
+            model=self.config.model_name,
+            system=system_prompt,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return json.loads(message.to_json())["content"][0]["text"]
+
+
+class DeepseekStrategy(LLMStrategy):
+    def __init__(self, config: ModelConfig, default_api_key: Optional[str] = None):
+        super().__init__(config)
+        self.default_api_key = default_api_key
+
+    def query(self, prompt: str, system_prompt: str, api_key: Optional[str] = None, temperature: Optional[float] = None) -> str:
+        used_key = api_key or self.default_api_key
+        if not used_key:
+            raise ValueError("No API key provided for Deepseek API")
+
+        client = OpenAI(
+            api_key=used_key,
+            base_url="https://api.deepseek.com/v1"
+        )
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": prompt}
+        ]
+
+        completion = client.chat.completions.create(
+            model=self.config.model_name,
+            messages=messages,
+            temperature=temperature,
+        )
+        return completion.choices[0].message.content
diff --git a/llm_client/query_manager.py b/llm_client/query_manager.py
@@ -0,0 +1,52 @@
+from typing import Optional, Dict
+from .models import TokenCounter
+from .factory import LLMStrategyFactory
+import time
+
+class LLMQueryManager:
+    def __init__(
+            self,
+            api_keys: Optional[Dict[str, str]] = None,
+            token_counter: Optional[TokenCounter] = None
+    ):
+        self.token_counter = token_counter or TokenCounter()
+        self.factory = LLMStrategyFactory(api_keys)
+
+    def query_model(
+            self,
+            model_name: str,
+            prompt: str,
+            system_prompt: str,
+            api_key: Optional[str] = None,
+            max_retries: int = 5,
+            timeout: float = 5.0,
+            print_cost: bool = True,
+            temperature: Optional[float] = None,
+    ) -> str:
+        strategy = self.factory.create_strategy(model_name)
+        for attempt in range(max_retries):
+            try:
+                answer = strategy.query(prompt, system_prompt, api_key, temperature)
+
+                input_tokens = strategy.count_tokens(system_prompt + prompt)
+                output_tokens = strategy.count_tokens(answer)
+                self.token_counter.update_counts(model_name, input_tokens, output_tokens)
+                if print_cost:
+                    cost = self.token_counter.calculate_cost(
+                        {model_name: strategy.config}
+                    )
+                    print(f"Current experiment cost = ${cost:.6f}, ** Approximate values, may not reflect true cost")
+
+                return answer
+            except ValueError as e:
+                if 'No API key provided for' in str(e):
+                    raise ValueError("No API key provided. Please provide an API key.")
+                raise e
+
+            except Exception as e:
+                if attempt == max_retries - 1:
+                    raise Exception(f"Max retries reached: {str(e)}")
+                print(f"Attempt {attempt + 1} failed: {str(e)}")
+                time.sleep(timeout)
+
+        raise Exception("Max retries: timeout")
diff --git a/llm_client/tests/__init__.py b/llm_client/tests/__init__.py