codegen-sh · tawsifkamal · Mar 10, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 7, 2025
@@ -20,7 +20,7 @@
 
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph.graph import CompiledGraph
-from langgraph.prebuilt import create_react_agent
+from codegen.extensions.langchain.graph import create_react_agent
 from langchain_core.messages import SystemMessage
 
 
@@ -70,7 +70,7 @@ def create_codebase_agent(
 
     memory = MemorySaver() if memory else None
 
-    return create_react_agent(model=llm, tools=tools, prompt=system_message, checkpointer=memory, debug=debug)
+    return create_react_agent(model=llm, tools=tools, system_message=system_message, checkpointer=memory, debug=debug)
 
 
 if __name__ == "__main__":

@@ -7,7 +7,14 @@
    "outputs": [],
    "source": [
     "%load_ext autoreload\n",
-    "%autoreload 2"
+    "%autoreload 2\n",
+    "\n",
+    "from dotenv import load_dotenv  # type: ignore\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "from codegen.extensions.swebench.utils import SWEBenchDataset, get_swe_bench_examples  # noqa: E402\n",
+    "from run_eval import run_eval  # noqa: E402"
    ]
   },
   {
@@ -16,9 +23,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from codegen.sdk.core.codebase import Codebase\n",
-    "from codegen.extensions.swebench.utils import SWEBenchDataset, get_swe_bench_examples\n",
-    "from run_eval import run_eval"
+    "examples = get_swe_bench_examples(dataset=SWEBenchDataset.LITE, split=\"test\", offset=0, length=10)"
    ]
   },
   {
@@ -27,43 +32,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "examples = get_swe_bench_examples(dataset=SWEBenchDataset.LITE, split=\"test\", offset=0, length=1)"
+    "await run_eval(use_existing_preds=None, dataset=\"lite\", length=20, repo=\"django/django\", num_workers=10, model=\"claude-3-7-sonnet-latest\")"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "codebase = Codebase.from_repo(examples[0].repo, commit=examples[0].base_commit, tmp_dir=f\"/tmp/{examples[0].instance_id}\")\n",
-    "# this will allow us to reuse the codebase for multiple examples\n",
-    "codebases = {examples[0].instance_id: codebase}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "await run_eval(use_existing_preds=None, dataset=\"lite\", length=None, instance_id=examples[0].instance_id, local=True, codebases=codebases)\n",
-    "codebases[examples[0].instance_id].reset()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -82,7 +52,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.1"
+   "version": "3.13.0"
   }
  },
  "nbformat": 4,

@@ -10,6 +10,7 @@ dependencies = [
   "tiktoken<1.0.0,>=0.5.1",
   "tabulate>=0.9.0,<1.0.0",
   "codeowners<1.0.0,>=0.6.0",
+  "anthropic",
   "dataclasses-json<1.0.0,>=0.6.4",
   "dicttoxml<2.0.0,>=1.7.16",
   "xmltodict<1.0.0,>=0.13.0",

@@ -0,0 +1,95 @@
+from typing import TYPE_CHECKING, Optional
+from uuid import uuid4
+
+from langchain.tools import BaseTool
+from langchain_core.messages import AIMessage
+
+from codegen.extensions.langchain.agent import create_chat_agent
+
+if TYPE_CHECKING:
+    from codegen import Codebase
+
+
+class ChatAgent:
+    """Agent for interacting with a codebase."""
+
+    def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", model_name: str = "claude-3-5-sonnet-latest", memory: bool = True, tools: Optional[list[BaseTool]] = None, **kwargs):
+        """Initialize a CodeAgent.
+
+        Args:
+            codebase: The codebase to operate on
+            model_provider: The model provider to use ("anthropic" or "openai")
+            model_name: Name of the model to use
+            memory: Whether to let LLM keep track of the conversation history
+            tools: Additional tools to use
+            **kwargs: Additional LLM configuration options. Supported options:
+                - temperature: Temperature parameter (0-1)
+                - top_p: Top-p sampling parameter (0-1)
+                - top_k: Top-k sampling parameter (>= 1)
+                - max_tokens: Maximum number of tokens to generate
+        """
+        self.codebase = codebase
+        self.agent = create_chat_agent(self.codebase, model_provider=model_provider, model_name=model_name, memory=memory, additional_tools=tools, **kwargs)
+
+    def run(self, prompt: str, thread_id: Optional[str] = None) -> str:
+        """Run the agent with a prompt.
+
+        Args:
+            prompt: The prompt to run
+            thread_id: Optional thread ID for message history. If None, a new thread is created.
+
+        Returns:
+            The agent's response
+        """
+        if thread_id is None:
+            thread_id = str(uuid4())
+
+        input = {"query": prompt}
+        stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id}}, stream_mode="values")
+
+        for s in stream:
+            message = s["messages"][-1]
+            if isinstance(message, tuple):
+                print(message)
+            else:
+                if isinstance(message, AIMessage) and isinstance(message.content, list) and "text" in message.content[0]:
+                    AIMessage(message.content[0]["text"]).pretty_print()
+                else:
+                    message.pretty_print()
+
+        return s["final_answer"]
+
+    def chat(self, prompt: str, thread_id: Optional[str] = None) -> tuple[str, str]:
+        """Chat with the agent, maintaining conversation history.
+
+        Args:
+            prompt: The user message
+            thread_id: Optional thread ID for message history. If None, a new thread is created.
+
+        Returns:
+            A tuple of (response_content, thread_id) to allow continued conversation
+        """
+        if thread_id is None:
+            thread_id = str(uuid4())
+            print(f"Starting new chat thread: {thread_id}")
+        else:
+            print(f"Continuing chat thread: {thread_id}")
+
+        response = self.run(prompt, thread_id=thread_id)
+        return response, thread_id
+
+    def get_chat_history(self, thread_id: str) -> list:
+        """Retrieve the chat history for a specific thread.
+
+        Args:
+            thread_id: The thread ID to retrieve history for
+
+        Returns:
+            List of messages in the conversation history
+        """
+        # Access the agent's memory to get conversation history
+        if hasattr(self.agent, "get_state"):
+            state = self.agent.get_state({"configurable": {"thread_id": thread_id}})
+            if state and "messages" in state:
+                return state["messages"]
+        return []
@@ -3,7 +3,7 @@
 from uuid import uuid4
 
 from langchain.tools import BaseTool
-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, HumanMessage
 from langchain_core.runnables.config import RunnableConfig
 from langsmith import Client
 
@@ -94,8 +94,17 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str:
 
         # this message has a reducer which appends the current message to the existing history
         # see more https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers
-        input = {"messages": [("user", prompt)]}
-        tags, metadata = self.get_tags_metadata()
+        input = {"query": prompt}
+        metadata = {"project": self.project_name}
+        tags = []
+        # Add SWEBench run ID and instance ID to the metadata and tags for filtering
+        if self.run_id is not None:
+            metadata["swebench_run_id"] = self.run_id
+            tags.append(self.run_id)
+
+        if self.instance_id is not None:
+            metadata["swebench_instance_id"] = self.instance_id
+            tags.append(self.instance_id)
 
         config = RunnableConfig(configurable={"thread_id": thread_id}, tags=tags, metadata=metadata, recursion_limit=100)
         # we stream the steps instead of invoke because it allows us to access intermediate nodes
@@ -105,7 +114,11 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str:
         run_ids = []
 
         for s in stream:
-            message = s["messages"][-1]
+            if len(s["messages"]) == 0:
+                message = HumanMessage(content=prompt)
+            else:
+                message = s["messages"][-1]
+
             if isinstance(message, tuple):
                 print(message)
             else:
@@ -119,7 +132,7 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str:
                     run_ids.append(message.additional_kwargs["run_id"])
 
         # Get the last message content
-        result = s["messages"][-1].content
+        result = s["final_answer"]
 
         # Try to find run IDs in the LangSmith client's recent runs
         try:

@@ -6,11 +6,10 @@
 from langchain_core.messages import SystemMessage
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph.graph import CompiledGraph
-from langgraph.prebuilt import create_react_agent
 
-from .llm import LLM
-from .prompts import REASONER_SYSTEM_MESSAGE
-from .tools import (
+from codegen.extensions.langchain.llm import LLM
+from codegen.extensions.langchain.prompts import REASONER_SYSTEM_MESSAGE
+from codegen.extensions.langchain.tools import (
     CreateFileTool,
     DeleteFileTool,
     ListDirectoryTool,
@@ -25,6 +24,8 @@
     ViewFileTool,
 )
 
+from .graph import create_react_agent
+
 if TYPE_CHECKING:
     from codegen import Codebase
 
@@ -88,7 +89,7 @@ def create_codebase_agent(
 
     memory = MemorySaver() if memory else None
 
-    return create_react_agent(model=llm, tools=tools, prompt=system_message, checkpointer=memory, debug=debug)
+    return create_react_agent(model=llm, tools=tools, system_message=system_message, checkpointer=memory, debug=debug)
 
 
 def create_chat_agent(
@@ -137,7 +138,7 @@ def create_chat_agent(
 
     memory = MemorySaver() if memory else None
 
-    return create_react_agent(model=llm, tools=tools, prompt=system_message, checkpointer=memory, debug=debug)
+    return create_react_agent(model=llm, tools=tools, system_message=system_message, checkpointer=memory, debug=debug)
 
 
 def create_codebase_inspector_agent(
@@ -174,7 +175,7 @@ def create_codebase_inspector_agent(
     ]
 
     memory = MemorySaver() if memory else None
-    return create_react_agent(model=llm, tools=tools, prompt=system_message, checkpointer=memory, debug=debug)
+    return create_react_agent(model=llm, tools=tools, system_message=system_message, checkpointer=memory, debug=debug)
 
 
 def create_agent_with_tools(
@@ -208,4 +209,4 @@ def create_agent_with_tools(
 
     memory = MemorySaver() if memory else None
 
-    return create_react_agent(model=llm, tools=tools, prompt=system_message, checkpointer=memory, debug=debug)
+    return create_react_agent(model=llm, tools=tools, system_message=system_message, checkpointer=memory, debug=debug)