From 1f0abf2bd8205a2627c1ac824901adc79746e9bf Mon Sep 17 00:00:00 2001
From: Nick Sovich <nick.sovich@redcellpartners.com>
Date: Fri, 24 Jan 2025 06:25:43 -0500
Subject: [PATCH] Add support for gemini models

---
 agents.py        | 68 ++++++++++++++++++++++++++++++++++--------------
 ai_lab_repo.py   | 43 +++++++++++++++---------------
 inference.py     | 68 ++++++++++++++++++++++++++++++++++++++++++------
 requirements.txt |  7 +++++
 4 files changed, 138 insertions(+), 48 deletions(-)

diff --git a/agents.py b/agents.py
index c6fd4cd..7db43e8 100755
--- a/agents.py
+++ b/agents.py
@@ -145,6 +145,7 @@ def get_score(outlined_plan, latex, reward_model_llm, reviewer_type=None, attemp
                 prompt=(
                     f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\n"
                     f"The following text is the research latex that the model produced: \n{latex}\n\n"), temp=0.0)
+            print(f"DEBUG: query_model parameters - model_str: {reward_model_llm}, system_prompt: {sys[:50]}..., prompt: {('''f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\nThe following text is the research latex that the model produced: \n{latex}\n\n"''')[:50]}...")
             review_json = extract_json_between_markers(scoring)
 
             overall = int(review_json["Overall"]) / 10
@@ -188,14 +189,21 @@ def __init__(self, model="gpt-4o-mini", notes=None, openai_api_key=None):
         self.openai_api_key = openai_api_key
 
     def inference(self, plan, report):
+        print("** Reviewers agent")
         reviewer_1 = "You are a harsh but fair reviewer and expect good experiments that lead to insights for the research topic."
-        review_1 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_1, openai_api_key=self.openai_api_key)
+        review_1_score, review_1_output, review_1_valid = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_1, openai_api_key=self.openai_api_key)
+        if not review_1_valid: review_1 = f"Review failed due to error: {review_1_output}"
+        else: review_1 = f"Review Score: {review_1_score}, Full Review: {review_1_output}"
 
         reviewer_2 = "You are a harsh and critical but fair reviewer who is looking for an idea that would be impactful in the field."
-        review_2 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_2, openai_api_key=self.openai_api_key)
+        review_2_score, review_2_output, review_2_valid = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_2, openai_api_key=self.openai_api_key)
+        if not review_2_valid: review_2 = f"Review failed due to error: {review_2_output}"
+        else: review_2 = f"Review Score: {review_2_score}, Full Review: {review_2_output}"
 
         reviewer_3 = "You are a harsh but fair open-minded reviewer that is looking for novel ideas that have not been proposed before."
-        review_3 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_3, openai_api_key=self.openai_api_key)
+        review_3_score, review_3_output, review_3_valid = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_3, openai_api_key=self.openai_api_key)
+        if not review_3_valid: review_3 = f"Review failed due to error: {review_3_output}"
+        else: review_3 = f"Review Score: {review_3_score}, Full Review: {review_3_output}"
 
         return f"Reviewer #1:\n{review_1}, \nReviewer #2:\n{review_2}, \nReviewer #3:\n{review_3}"
 
@@ -251,7 +259,26 @@ def inference(self, research_topic, phase, step, feedback="", temp=None):
             f"Current Step #{step}, Phase: {phase}\n{complete_str}\n"
             f"[Objective] Your goal is to perform research on the following topic: {research_topic}\n"
             f"Feedback: {feedback}\nNotes: {notes_str}\nYour previous command was: {self.prev_comm}. Make sure your new output is very different.\nPlease produce a single command below:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, temp=temp, openai_api_key=self.openai_api_key)
+
+        print(f"DEBUG: BaseAgent.inference - model_str: {self.model}")  # Debug print
+
+        if self.model in ["gemini-1.0-pro-latest", "gemini-2.0-flash-thinking-exp-01-21"]: # Gemini models
+            model_resp = query_model(
+                model_str=self.model, 
+                system_prompt=sys_prompt, 
+                prompt=prompt, 
+                temp=temp, 
+                google_api_key=self.openai_api_key  # Pass google_api_key for Gemini
+            )
+        else: # OpenAI models (o1-mini, etc.)
+            model_resp = query_model(
+                model_str=self.model, 
+                system_prompt=sys_prompt, 
+                prompt=prompt, 
+                temp=temp, 
+                openai_api_key=self.openai_api_key # Pass openai_api_key for OpenAI
+            )
+
         print("^"*50, phase, "^"*50)
         model_resp = self.clean_text(model_resp)
         self.prev_comm = model_resp
@@ -301,7 +328,10 @@ def generate_readme(self):
         prompt = (
             f"""History: {history_str}\n{'~' * 10}\n"""
             f"Please produce the readme below in markdown:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
+        if self.model in ["gemini-1.0-pro-latest", "gemini-2.0-flash-thinking-exp-01-21"]: # Gemini models
+            model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, google_api_key=self.openai_api_key)
+        else: # OpenAI models (o1-mini, etc.)
+            model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
         return model_resp.replace("```markdown", "")
 
     def context(self, phase):
@@ -557,6 +587,7 @@ def role_description(self):
 class PhDStudentAgent(BaseAgent):
     def __init__(self, model="gpt4omini", notes=None, max_steps=100, openai_api_key=None):
         super().__init__(model, notes, max_steps, openai_api_key)
+        print(f"** PhDStudent agent model: {model}")
         self.phases = [
             "literature review",
             "plan formulation",
@@ -612,14 +643,17 @@ def context(self, phase):
         else:
             return ""
 
-    def requirements_txt(self):
-        sys_prompt = f"""You are {self.role_description()} \nTask instructions: Your goal is to integrate all of the knowledge, code, reports, and notes provided to you and generate a requirements.txt for a github repository for all of the code."""
-        history_str = "\n".join([_[1] for _ in self.history])
-        prompt = (
-            f"""History: {history_str}\n{'~' * 10}\n"""
-            f"Please produce the requirements.txt below in markdown:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
-        return model_resp
+        def requirements_txt(self):
+            sys_prompt = f"""You are {self.role_description()} \nTask instructions: Your goal is to integrate all of the knowledge, code, reports, and notes provided to you and generate a requirements.txt for a github repository for all of the code."""
+            history_str = "\n".join([_[1] for _ in self.history])
+            prompt = (
+                f"""History: {history_str}\n{'~' * 10}\n"""
+                f"Please produce the requirements.txt below in markdown:\n")
+            if self.model in ["gemini-1.0-pro-latest", "gemini-2.0-flash-thinking-exp-01-21"]: # Gemini models
+                model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, google_api_key=self.openai_api_key)
+            else: # OpenAI models (o1-mini, etc.)
+                model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
+            return model_resp
 
     def example_command(self, phase):
         if phase not in self.phases:
@@ -654,8 +688,7 @@ def command_descriptions(self, phase):
         elif phase == "results interpretation":
             return (
                 "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where 'dialogue here' is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n"
-                "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. DIALOGUE).\n"
-            )
+                "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. DIALOGUE).\n")
         #elif phase == "report writing":
         #    return (
         #        "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where 'dialogue here' is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n"
@@ -722,7 +755,4 @@ def add_review(self, review, arx_eng):
     def format_review(self):
         return "Provided here is a literature review on this topic:\n" + "\n".join(
             f"arXiv ID: {_l['arxiv_id']}, Summary: {_l['summary']}"
-            for _l in self.lit_review)
-
-
-
+            for _l in self.lit_review)
\ No newline at end of file
diff --git a/ai_lab_repo.py b/ai_lab_repo.py
index dbe9541..fb1b70d 100755
--- a/ai_lab_repo.py
+++ b/ai_lab_repo.py
@@ -6,6 +6,7 @@
 
 import argparse
 import pickle
+import google.generativeai as genai  # Import Gemini library
 
 DEFAULT_LLM_BACKBONE = "o1-mini"
 
@@ -24,7 +25,7 @@ def __init__(self, research_topic, openai_api_key, max_steps=100, num_papers_lit
         self.notes = notes
         self.max_steps = max_steps
         self.compile_pdf = compile_pdf
-        self.openai_api_key = openai_api_key
+        self.openai_api_key = openai_api_key # Still using openai_api_key name for legacy reasons, but should be considered general API key - used for both OpenAI and Gemini if selected
         self.research_topic = research_topic
         self.model_backbone = agent_model_backbone
         self.num_papers_lit_review = num_papers_lit_review
@@ -79,7 +80,7 @@ def __init__(self, research_topic, openai_api_key, max_steps=100, num_papers_lit
 
         self.save = True
         self.verbose = True
-        self.reviewers = ReviewersAgent(model=self.model_backbone, notes=self.notes, openai_api_key=self.openai_api_key)
+        self.reviewers = ReviewersAgent(model=self.model_backbone, notes=self.notes, openai_api_key=self.openai_api_key) # still using openai_api_key for agents, will update agent class as well
         self.phd = PhDStudentAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key)
         self.postdoc = PostdocAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key)
         self.professor = ProfessorAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key)
@@ -541,7 +542,8 @@ def parse_arguments():
     parser.add_argument(
         '--deepseek-api-key',
         type=str,
-        help='Provide the DeepSeek API key.'
+        default="False", # setting default to False as it's not primarily used in this version
+        help='Provide the DeepSeek API key. (Not used in this version, for future compatibility)'
     )
 
     parser.add_argument(
@@ -566,7 +568,7 @@ def parse_arguments():
     parser.add_argument(
         '--api-key',
         type=str,
-        help='Provide the OpenAI API key.'
+        help='Provide the API key (OpenAI API key or Google API key if using Gemini models). Will prioritize GOOGLE_API_KEY env variable if both are set and Gemini is selected.'
     )
 
     parser.add_argument(
@@ -580,7 +582,7 @@ def parse_arguments():
         '--llm-backend',
         type=str,
         default="o1-mini",
-        help='Backend LLM to use for agents in Agent Laboratory.'
+        help='Backend LLM to use for agents in Agent Laboratory. Options: o1-mini, gemini-1.0-pro-latest'
     )
 
     parser.add_argument(
@@ -638,13 +640,22 @@ def parse_arguments():
 
     api_key = os.getenv('OPENAI_API_KEY') or args.api_key
     deepseek_api_key = os.getenv('DEEPSEEK_API_KEY') or args.deepseek_api_key
+    google_api_key = os.getenv('GOOGLE_API_KEY') or args.api_key
+
     if args.api_key is not None and os.getenv('OPENAI_API_KEY') is None:
         os.environ["OPENAI_API_KEY"] = args.api_key
     if args.deepseek_api_key is not None and os.getenv('DEEPSEEK_API_KEY') is None:
         os.environ["DEEPSEEK_API_KEY"] = args.deepseek_api_key
+    if args.api_key is not None and os.getenv('GOOGLE_API_KEY') is None: # setting google api key as well if openai key is provided as fallback for gemini
+        os.environ["GOOGLE_API_KEY"] = args.api_key
+
+    if llm_backend == "gemini-2.0-flash-thinking-exp-01-21": # check if gemini is selected as backend
+        if not google_api_key: # if gemini is selected, prioritize GOOGLE_API_KEY env variable or --api-key
+            raise ValueError("API key must be provided via --api-key or the GOOGLE_API_KEY environment variable when using Gemini models.")
+        genai.configure(api_key=google_api_key) # configure gemini api with google api key
+    elif not api_key and not deepseek_api_key: # for o1-mini (and potentially other openai models in the future), require openai key
+        raise ValueError("API key must be provided via --api-key / -deepseek-api-key or the OPENAI_API_KEY / DEEPSEEK_API_KEY environment variable when using OpenAI models.")
 
-    if not api_key and not deepseek_api_key:
-        raise ValueError("API key must be provided via --api-key / -deepseek-api-key or the OPENAI_API_KEY / DEEPSEEK_API_KEY environment variable.")
 
     ##########################################################
     # Research question that the agents are going to explore #
@@ -659,13 +670,10 @@ def parse_arguments():
          "note": f"You should come up with a plan for TWO experiments."},
 
         {"phases": ["plan formulation", "data preparation", "running experiments"],
-         "note": "Please use gpt-4o-mini for your experiments."},
-
-        {"phases": ["running experiments"],
-         "note": f'Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n'},
+         "note": f"Please use {llm_backend} for your experiments. You have the option to use either 'o1-mini' or 'gemini-1.0-pro-latest' as the backend model."}, # updated note to reflect gemini option
 
-        {"phases": ["running experiments"],
-         "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
+        # Removed OpenAI specific notes, as now we are using Gemini by default
+        # If you want to use OpenAI models again, you would need to add similar notes but for OpenAI API
 
         {"phases": ["running experiments"],
          "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
@@ -719,8 +727,7 @@ def parse_arguments():
             research_topic=research_topic,
             notes=task_notes_LLM,
             agent_model_backbone=agent_models,
-            human_in_loop_flag=human_in_loop,
-            openai_api_key=api_key,
+            openai_api_key=api_key, # still using openai_api_key, but it's ok as agents will use it as general key now - for both OpenAI and Gemini if selected
             compile_pdf=compile_pdf,
             num_papers_lit_review=num_papers_lit_review,
             papersolver_max_steps=papersolver_max_steps,
@@ -728,9 +735,3 @@ def parse_arguments():
         )
 
     lab.perform_research()
-
-
-
-
-
-
diff --git a/inference.py b/inference.py
index d87ad9e..f22552f 100755
--- a/inference.py
+++ b/inference.py
@@ -2,6 +2,7 @@
 from openai import OpenAI
 import openai
 import os, anthropic, json
+import google.generativeai as genai
 
 TOKENS_IN = dict()
 TOKENS_OUT = dict()
@@ -17,6 +18,8 @@ def curr_cost_est():
         "claude-3-5-sonnet": 3.00 / 1000000,
         "deepseek-chat": 1.00 / 1000000,
         "o1": 15.00 / 1000000,
+        "gemini-1.0-pro-latest": 0.00 / 1000000, # Gemini Pro is currently free, adjust if pricing changes
+        "gemini-2.0-flash-thinking-exp-01-21": 0.00 / 1000000, # Gemini Flash is also currently free, adjust if pricing changes
     }
     costmap_out = {
         "gpt-4o": 10.00/ 1000000,
@@ -26,20 +29,33 @@ def curr_cost_est():
         "claude-3-5-sonnet": 12.00 / 1000000,
         "deepseek-chat": 5.00 / 1000000,
         "o1": 60.00 / 1000000,
+        "gemini-1.0-pro-latest": 0.00 / 1000000, # Gemini Pro is currently free, adjust if pricing changes
+        "gemini-2.0-flash-thinking-exp-01-21": 0.00 / 1000000, # Gemini Flash is also currently free, adjust if pricing changes
     }
     return sum([costmap_in[_]*TOKENS_IN[_] for _ in TOKENS_IN]) + sum([costmap_out[_]*TOKENS_OUT[_] for _ in TOKENS_OUT])
 
-def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic_api_key=None, tries=5, timeout=5.0, temp=None, print_cost=True, version="1.5"):
-    preloaded_api = os.getenv('OPENAI_API_KEY')
-    if openai_api_key is None and preloaded_api is not None:
-        openai_api_key = preloaded_api
-    if openai_api_key is None and anthropic_api_key is None:
-        raise Exception("No API key provided in query_model function")
+def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic_api_key=None, google_api_key=None, tries=5, timeout=5.0, temp=None, print_cost=True, version="1.5"):
+    preloaded_openai_api = os.getenv('OPENAI_API_KEY')
+    preloaded_google_api = os.getenv('GOOGLE_API_KEY') # Check for Google API key
+
+    if openai_api_key is None and preloaded_openai_api is not None:
+        openai_api_key = preloaded_openai_api
+    if google_api_key is None and preloaded_google_api is not None: # Use preloaded Google API key if available
+        google_api_key = preloaded_google_api
+
+    if openai_api_key is None and anthropic_api_key is None and google_api_key is None: # Check for Google API key as well
+        raise Exception("No API key provided in query_model function (OpenAI, Anthropic, or Google required)")
+
     if openai_api_key is not None:
         openai.api_key = openai_api_key
         os.environ["OPENAI_API_KEY"] = openai_api_key
     if anthropic_api_key is not None:
         os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
+    if google_api_key is not None: # Configure Gemini with Google API key
+        genai.configure(api_key=google_api_key)
+        os.environ["GOOGLE_API_KEY"] = google_api_key
+
+
     for _ in range(tries):
         try:
             if model_str == "gpt-4o-mini" or model_str == "gpt4omini" or model_str == "gpt-4omini" or model_str == "gpt4o-mini":
@@ -139,7 +155,7 @@ def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic
                     {"role": "user", "content": system_prompt + prompt}]
                 if version == "0.28":
                     completion = openai.ChatCompletion.create(
-                        model="o1-2024-12-17",  # engine = "deployment_name".
+                        model=f"{model_str}",  # engine = "deployment_name".
                         messages=messages)
                 else:
                     client = OpenAI()
@@ -159,13 +175,46 @@ def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic
                     completion = client.chat.completions.create(
                         model="o1-preview", messages=messages)
                 answer = completion.choices[0].message.content
+            elif model_str == "gemini-1.0-pro-latest": # Gemini Pro
+                print(f"DEBUG: Gemini Pro - model_str: {model_str}") # DEBUG
+                print(f"DEBUG: Gemini Pro - system_prompt: {system_prompt}") # DEBUG
+                print(f"DEBUG: Gemini Pro - prompt: {prompt}") # DEBUG
+                model = genai.GenerativeModel('gemini-1.0-pro-latest')
+                print("DEBUG: Gemini Pro - GenerativeModel instantiated") # DEBUG
+                time.sleep(10) # ADD SLEEP HERE - RATE LIMITING TEST
+                try: # DEBUG - Add try-except around generate_content
+                    response = model.generate_content([system_prompt, prompt])
+                    print("DEBUG: Gemini Pro - response generated successfully") # DEBUG
+                    answer = response.text
+                except Exception as gemini_e: # DEBUG - Catch potential exceptions
+                    print(f"DEBUG: Gemini Pro - Exception during generate_content: {gemini_e}, Exception: {gemini_e}") # DEBUG
+                    raise gemini_e # DEBUG - Re-raise the exception to be caught in the outer loop
 
-            if model_str in ["o1-preview", "o1-mini", "claude-3.5-sonnet", "o1"]:
+            elif model_str == "gemini-2.0-flash-thinking-exp-01-21": # Gemini Flash
+                print(f"DEBUG: Gemini Flash - model_str: {model_str}") # DEBUG
+                print(f"DEBUG: Gemini Flash - system_prompt: {system_prompt}") # DEBUG
+                print(f"DEBUG: Gemini Flash - prompt: {prompt}") # DEBUG
+                model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
+                print("DEBUG: Gemini Flash - GenerativeModel instantiated") # DEBUG
+                time.sleep(10)  # ADD SLEEP HERE - RATE LIMITING TEST
+                try: # DEBUG - Add try-except around generate_content
+                    response = model.generate_content([system_prompt, prompt])
+                    print("DEBUG: Gemini Flash - response generated successfully") # DEBUG
+                    answer = response.text
+                except Exception as gemini_e: # DEBUG - Catch potential exceptions
+                    print(f"DEBUG: Gemini Flash - Exception during generate_content: {gemini_e}, Exception: {gemini_e}") # DEBUG
+                    raise gemini_e # DEBUG - Re-raise the exception to be caught in the outer loop
+
+
+            # TODO use model.count_tokens instead for
+            if model_str in ["o1-preview", "o1-mini", "claude-3.5-sonnet", "o1", "gemini-1.0-pro-latest", "gemini-2.0-flash-thinking-exp-01-21"]:
                 encoding = tiktoken.encoding_for_model("gpt-4o")
             elif model_str in ["deepseek-chat"]:
                 encoding = tiktoken.encoding_for_model("cl100k_base")
             else:
                 encoding = tiktoken.encoding_for_model(model_str)
+
+
             if model_str not in TOKENS_IN:
                 TOKENS_IN[model_str] = 0
                 TOKENS_OUT[model_str] = 0
@@ -181,4 +230,7 @@ def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic
     raise Exception("Max retries: timeout")
 
 
+#print(query_model(model_str="o1-mini", prompt="hi", system_prompt="hey"))
+
+
 #print(query_model(model_str="o1-mini", prompt="hi", system_prompt="hey"))
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index e08992b..234bb15 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,6 +33,13 @@ frozenlist==1.5.0
 fsspec==2024.9.0
 gast==0.6.0
 google-pasta==0.2.0
+google-ai-generativelanguage==0.6.10
+google-api-core==2.24.0
+google-api-python-client==2.159.0
+google-auth==2.37.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.3
+googleapis-common-protos==1.66.0
 grpcio==1.68.0
 h11==0.14.0
 h5py==3.12.1