SamuelSchmidgall · sovich-rcp · Jan 24, 2025
diff --git a/agents.py b/agents.py
@@ -145,6 +145,7 @@ def get_score(outlined_plan, latex, reward_model_llm, reviewer_type=None, attemp
                 prompt=(
                     f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\n"
                     f"The following text is the research latex that the model produced: \n{latex}\n\n"), temp=0.0)
+            print(f"DEBUG: query_model parameters - model_str: {reward_model_llm}, system_prompt: {sys[:50]}..., prompt: {('''f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\nThe following text is the research latex that the model produced: \n{latex}\n\n"''')[:50]}...")
             review_json = extract_json_between_markers(scoring)
 
             overall = int(review_json["Overall"]) / 10
@@ -188,14 +189,21 @@ def __init__(self, model="gpt-4o-mini", notes=None, openai_api_key=None):
         self.openai_api_key = openai_api_key
 
     def inference(self, plan, report):
+        print("** Reviewers agent")
         reviewer_1 = "You are a harsh but fair reviewer and expect good experiments that lead to insights for the research topic."
-        review_1 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_1, openai_api_key=self.openai_api_key)
+        review_1_score, review_1_output, review_1_valid = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_1, openai_api_key=self.openai_api_key)
+        if not review_1_valid: review_1 = f"Review failed due to error: {review_1_output}"
+        else: review_1 = f"Review Score: {review_1_score}, Full Review: {review_1_output}"
 
         reviewer_2 = "You are a harsh and critical but fair reviewer who is looking for an idea that would be impactful in the field."
-        review_2 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_2, openai_api_key=self.openai_api_key)
+        review_2_score, review_2_output, review_2_valid = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_2, openai_api_key=self.openai_api_key)
+        if not review_2_valid: review_2 = f"Review failed due to error: {review_2_output}"
+        else: review_2 = f"Review Score: {review_2_score}, Full Review: {review_2_output}"
 
         reviewer_3 = "You are a harsh but fair open-minded reviewer that is looking for novel ideas that have not been proposed before."
-        review_3 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_3, openai_api_key=self.openai_api_key)
+        review_3_score, review_3_output, review_3_valid = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_3, openai_api_key=self.openai_api_key)
+        if not review_3_valid: review_3 = f"Review failed due to error: {review_3_output}"
+        else: review_3 = f"Review Score: {review_3_score}, Full Review: {review_3_output}"
 
         return f"Reviewer #1:\n{review_1}, \nReviewer #2:\n{review_2}, \nReviewer #3:\n{review_3}"
 
@@ -251,7 +259,26 @@ def inference(self, research_topic, phase, step, feedback="", temp=None):
             f"Current Step #{step}, Phase: {phase}\n{complete_str}\n"
             f"[Objective] Your goal is to perform research on the following topic: {research_topic}\n"
             f"Feedback: {feedback}\nNotes: {notes_str}\nYour previous command was: {self.prev_comm}. Make sure your new output is very different.\nPlease produce a single command below:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, temp=temp, openai_api_key=self.openai_api_key)
+
+        print(f"DEBUG: BaseAgent.inference - model_str: {self.model}")  # Debug print
+
+        if self.model in ["gemini-1.0-pro-latest", "gemini-2.0-flash-thinking-exp-01-21"]: # Gemini models
+            model_resp = query_model(
+                model_str=self.model, 
+                system_prompt=sys_prompt, 
+                prompt=prompt, 
+                temp=temp, 
+                google_api_key=self.openai_api_key  # Pass google_api_key for Gemini
+            )
+        else: # OpenAI models (o1-mini, etc.)
+            model_resp = query_model(
+                model_str=self.model, 
+                system_prompt=sys_prompt, 
+                prompt=prompt, 
+                temp=temp, 
+                openai_api_key=self.openai_api_key # Pass openai_api_key for OpenAI
+            )
+
         print("^"*50, phase, "^"*50)
         model_resp = self.clean_text(model_resp)
         self.prev_comm = model_resp
@@ -301,7 +328,10 @@ def generate_readme(self):
         prompt = (
             f"""History: {history_str}\n{'~' * 10}\n"""
             f"Please produce the readme below in markdown:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
+        if self.model in ["gemini-1.0-pro-latest", "gemini-2.0-flash-thinking-exp-01-21"]: # Gemini models
+            model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, google_api_key=self.openai_api_key)
+        else: # OpenAI models (o1-mini, etc.)
+            model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
         return model_resp.replace("```markdown", "")
 
     def context(self, phase):
@@ -557,6 +587,7 @@ def role_description(self):
 class PhDStudentAgent(BaseAgent):
     def __init__(self, model="gpt4omini", notes=None, max_steps=100, openai_api_key=None):
         super().__init__(model, notes, max_steps, openai_api_key)
+        print(f"** PhDStudent agent model: {model}")
         self.phases = [
             "literature review",
             "plan formulation",
@@ -612,14 +643,17 @@ def context(self, phase):
         else:
             return ""
 
-    def requirements_txt(self):
-        sys_prompt = f"""You are {self.role_description()} \nTask instructions: Your goal is to integrate all of the knowledge, code, reports, and notes provided to you and generate a requirements.txt for a github repository for all of the code."""
-        history_str = "\n".join([_[1] for _ in self.history])
-        prompt = (
-            f"""History: {history_str}\n{'~' * 10}\n"""
-            f"Please produce the requirements.txt below in markdown:\n")
-        model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
-        return model_resp
+        def requirements_txt(self):
+            sys_prompt = f"""You are {self.role_description()} \nTask instructions: Your goal is to integrate all of the knowledge, code, reports, and notes provided to you and generate a requirements.txt for a github repository for all of the code."""
+            history_str = "\n".join([_[1] for _ in self.history])
+            prompt = (
+                f"""History: {history_str}\n{'~' * 10}\n"""
+                f"Please produce the requirements.txt below in markdown:\n")
+            if self.model in ["gemini-1.0-pro-latest", "gemini-2.0-flash-thinking-exp-01-21"]: # Gemini models
+                model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, google_api_key=self.openai_api_key)
+            else: # OpenAI models (o1-mini, etc.)
+                model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key)
+            return model_resp
 
     def example_command(self, phase):
         if phase not in self.phases:
@@ -654,8 +688,7 @@ def command_descriptions(self, phase):
         elif phase == "results interpretation":
             return (
                 "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where 'dialogue here' is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n"
-                "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. DIALOGUE).\n"
-            )
+                "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. DIALOGUE).\n")
         #elif phase == "report writing":
         #    return (
         #        "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where 'dialogue here' is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n"
@@ -722,7 +755,4 @@ def add_review(self, review, arx_eng):
     def format_review(self):
         return "Provided here is a literature review on this topic:\n" + "\n".join(
             f"arXiv ID: {_l['arxiv_id']}, Summary: {_l['summary']}"
-            for _l in self.lit_review)
-
-
-
+            for _l in self.lit_review)
diff --git a/ai_lab_repo.py b/ai_lab_repo.py
@@ -6,6 +6,7 @@
 
 import argparse
 import pickle
+import google.generativeai as genai  # Import Gemini library
 
 DEFAULT_LLM_BACKBONE = "o1-mini"
 
@@ -24,7 +25,7 @@ def __init__(self, research_topic, openai_api_key, max_steps=100, num_papers_lit
         self.notes = notes
         self.max_steps = max_steps
         self.compile_pdf = compile_pdf
-        self.openai_api_key = openai_api_key
+        self.openai_api_key = openai_api_key # Still using openai_api_key name for legacy reasons, but should be considered general API key - used for both OpenAI and Gemini if selected
         self.research_topic = research_topic
         self.model_backbone = agent_model_backbone
         self.num_papers_lit_review = num_papers_lit_review
@@ -79,7 +80,7 @@ def __init__(self, research_topic, openai_api_key, max_steps=100, num_papers_lit
 
         self.save = True
         self.verbose = True
-        self.reviewers = ReviewersAgent(model=self.model_backbone, notes=self.notes, openai_api_key=self.openai_api_key)
+        self.reviewers = ReviewersAgent(model=self.model_backbone, notes=self.notes, openai_api_key=self.openai_api_key) # still using openai_api_key for agents, will update agent class as well
         self.phd = PhDStudentAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key)
         self.postdoc = PostdocAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key)
         self.professor = ProfessorAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key)
@@ -541,7 +542,8 @@ def parse_arguments():
     parser.add_argument(
         '--deepseek-api-key',
         type=str,
-        help='Provide the DeepSeek API key.'
+        default="False", # setting default to False as it's not primarily used in this version
+        help='Provide the DeepSeek API key. (Not used in this version, for future compatibility)'
     )
 
     parser.add_argument(
@@ -566,7 +568,7 @@ def parse_arguments():
     parser.add_argument(
         '--api-key',
         type=str,
-        help='Provide the OpenAI API key.'
+        help='Provide the API key (OpenAI API key or Google API key if using Gemini models). Will prioritize GOOGLE_API_KEY env variable if both are set and Gemini is selected.'
     )
 
     parser.add_argument(
@@ -580,7 +582,7 @@ def parse_arguments():
         '--llm-backend',
         type=str,
         default="o1-mini",
-        help='Backend LLM to use for agents in Agent Laboratory.'
+        help='Backend LLM to use for agents in Agent Laboratory. Options: o1-mini, gemini-1.0-pro-latest'
     )
 
     parser.add_argument(
@@ -638,13 +640,22 @@ def parse_arguments():
 
     api_key = os.getenv('OPENAI_API_KEY') or args.api_key
     deepseek_api_key = os.getenv('DEEPSEEK_API_KEY') or args.deepseek_api_key
+    google_api_key = os.getenv('GOOGLE_API_KEY') or args.api_key
+
     if args.api_key is not None and os.getenv('OPENAI_API_KEY') is None:
         os.environ["OPENAI_API_KEY"] = args.api_key
     if args.deepseek_api_key is not None and os.getenv('DEEPSEEK_API_KEY') is None:
         os.environ["DEEPSEEK_API_KEY"] = args.deepseek_api_key
+    if args.api_key is not None and os.getenv('GOOGLE_API_KEY') is None: # setting google api key as well if openai key is provided as fallback for gemini
+        os.environ["GOOGLE_API_KEY"] = args.api_key
+
+    if llm_backend == "gemini-2.0-flash-thinking-exp-01-21": # check if gemini is selected as backend
+        if not google_api_key: # if gemini is selected, prioritize GOOGLE_API_KEY env variable or --api-key
+            raise ValueError("API key must be provided via --api-key or the GOOGLE_API_KEY environment variable when using Gemini models.")
+        genai.configure(api_key=google_api_key) # configure gemini api with google api key
+    elif not api_key and not deepseek_api_key: # for o1-mini (and potentially other openai models in the future), require openai key
+        raise ValueError("API key must be provided via --api-key / -deepseek-api-key or the OPENAI_API_KEY / DEEPSEEK_API_KEY environment variable when using OpenAI models.")
 
-    if not api_key and not deepseek_api_key:
-        raise ValueError("API key must be provided via --api-key / -deepseek-api-key or the OPENAI_API_KEY / DEEPSEEK_API_KEY environment variable.")
 
     ##########################################################
     # Research question that the agents are going to explore #
@@ -659,13 +670,10 @@ def parse_arguments():
          "note": f"You should come up with a plan for TWO experiments."},
 
         {"phases": ["plan formulation", "data preparation", "running experiments"],
-         "note": "Please use gpt-4o-mini for your experiments."},
-
-        {"phases": ["running experiments"],
-         "note": f'Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n'},
+         "note": f"Please use {llm_backend} for your experiments. You have the option to use either 'o1-mini' or 'gemini-1.0-pro-latest' as the backend model."}, # updated note to reflect gemini option
 
-        {"phases": ["running experiments"],
-         "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."},
+        # Removed OpenAI specific notes, as now we are using Gemini by default
+        # If you want to use OpenAI models again, you would need to add similar notes but for OpenAI API
 
         {"phases": ["running experiments"],
          "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."},
@@ -719,18 +727,11 @@ def parse_arguments():
             research_topic=research_topic,
             notes=task_notes_LLM,
             agent_model_backbone=agent_models,
-            human_in_loop_flag=human_in_loop,
-            openai_api_key=api_key,
+            openai_api_key=api_key, # still using openai_api_key, but it's ok as agents will use it as general key now - for both OpenAI and Gemini if selected
             compile_pdf=compile_pdf,
             num_papers_lit_review=num_papers_lit_review,
             papersolver_max_steps=papersolver_max_steps,
             mlesolver_max_steps=mlesolver_max_steps,
         )
 
     lab.perform_research()
-
-
-
-
-
-