hpcaitech
diff --git a/‎applications/ColossalChat/coati/distributed/agent/agentic.py‎
Lines changed: 9 additions & 12 deletions b/‎applications/ColossalChat/coati/distributed/agent/agentic.py‎
Lines changed: 9 additions & 12 deletions
diff --git a/‎applications/ColossalChat/coati/distributed/agent/langgraph_math_agentic.py‎
Lines changed: 122 additions & 0 deletions b/‎applications/ColossalChat/coati/distributed/agent/langgraph_math_agentic.py‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎applications/ColossalChat/coati/distributed/agent/langgraph_math_agentic_utils.py‎
Lines changed: 185 additions & 0 deletions b/‎applications/ColossalChat/coati/distributed/agent/langgraph_math_agentic_utils.py‎
Lines changed: 185 additions & 0 deletions
@@ -4,14 +4,11 @@
 
 import ray
 import torch
-from coati.distributed.agent.agentic_math_utils import TIR_SYSTEM, CustomTransformers
 from coati.distributed.producer import BaseProducer
-from qwen_agent.agents import TIRMathAgent
 from vllm import SamplingParams
 
 
-@ray.remote
-class AgenticProducer(BaseProducer):
+class BaseAgenticProducer(BaseProducer):
     """
     Asyncronous version of the producer that uses vLLM for generation.
     This class is designed to generate agentic response
@@ -29,7 +26,6 @@ def __init__(
         generate_config,
         async_producers,
         tokenizer_config=None,
-        agentic_config=None,
         microbatch_size=1,
         backend="transformers",
         num_generations: int = 8,
@@ -82,10 +78,13 @@ def __init__(
         self.async_producers = async_producers
         self.num_generations = num_generations
         self.generate_config = generate_config
-        self.agentic_config = model_config if not agentic_config else agentic_config
-        self.agentic_config.update({"model": model_config["path"]})
-        self.llm = CustomTransformers(self.agentic_config, self.producer_idx, generation_workers=self.async_producers)
-        self.bot = TIRMathAgent(llm=self.llm, name=model_config["path"], system_message=TIR_SYSTEM)
+
+    def _run_agentic_pipeline(self, messages):
+        """
+        Run the agentic pipeline to generate responses based on the input messages.
+        This function should be implemented in subclasses.
+        """
+        raise NotImplementedError
 
     def rollout(self, **kwargs) -> Dict[str, torch.Tensor]:
         """
@@ -110,9 +109,7 @@ def rollout(self, **kwargs) -> Dict[str, torch.Tensor]:
         }
         for i in range(self.num_generations):
             _messages = copy.deepcopy(messages)
-            for response in self.bot.run(messages):
-                continue
-            _messages.extend(response)
+            _messages = self._run_agentic_pipeline(_messages)
             response_input_ids = self.tokenizer.apply_chat_template(_messages, return_tensors="pt", tokenize=True)
             # truncate if too long
             response_input_ids = response_input_ids[:, : self.grpo_config["max_length"] - to_pad_left]
 
@@ -0,0 +1,122 @@
+from typing import Any, Dict
+
+import ray
+from coati.distributed.agent.agentic import BaseAgenticProducer
+from coati.distributed.agent.langgraph_math_agentic_utils import CustomOpenAIAPILLM, LangChainCustomLLM, python
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.prebuilt import create_react_agent
+
+
+@ray.remote
+class LangGraphMathAgenticProducer(BaseAgenticProducer):
+    """
+    Asyncronous version of the producer that uses vLLM for generation.
+    This class is designed to generate agentic response
+    """
+
+    def __init__(
+        self,
+        producer_idx,
+        num_producers,
+        num_consumer_procs,
+        num_episodes,
+        batch_size,
+        train_dataset_config,
+        model_config,
+        generate_config,
+        async_producers,
+        tokenizer_config=None,
+        agentic_config=None,
+        microbatch_size=1,
+        backend="transformers",
+        num_generations: int = 8,
+        consumer_plugin_config=None,
+        eval_dataset_config=None,
+        eval_interval=-1,  # disable evaluation
+        grpo_config: Dict[str, Any] = None,
+        eval_save_dir: str = "./eval",
+        eval_generation_config={},
+        project_name: str = None,
+        run_name: str = None,
+        wandb_group_name: str = None,
+        log_rollout_interval: int = 20,
+        rollout_log_file: str = "./rollout_log.jsonl",
+        enable_profiling: bool = False,
+        n_behind: int = 0,
+    ):
+        assert microbatch_size == 1  # microbatch_size must be 1 for agentic producer
+        assert batch_size == 1  # batch_size must be 1 for agentic producer
+        super().__init__(
+            producer_idx,
+            num_producers,
+            num_consumer_procs,
+            num_episodes,
+            batch_size,
+            train_dataset_config,
+            model_config,
+            generate_config,
+            async_producers,
+            tokenizer_config,
+            microbatch_size,
+            backend,
+            num_generations,
+            consumer_plugin_config,
+            eval_dataset_config=eval_dataset_config,
+            eval_interval=eval_interval,
+            grpo_config=grpo_config,
+            eval_save_dir=eval_save_dir,
+            eval_generation_config=eval_generation_config,
+            project_name=project_name,
+            run_name=run_name,
+            wandb_group_name=wandb_group_name,
+            log_rollout_interval=log_rollout_interval,
+            rollout_log_file=rollout_log_file,
+            enable_profiling=enable_profiling,
+            n_behind=n_behind,
+        )
+        self.agentic_config = agentic_config
+        self.agentic_config.pop("agentic_type", None)
+        self.llm_client = CustomOpenAIAPILLM({"model": model_config["path"]}, producer_idx, self.async_producers)
+        self.llm = LangChainCustomLLM(self.llm_client)
+        # self.python_repl = PythonREPL()
+        # repl_tool = Tool(
+        #         name="python_repl",
+        #         description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
+        #         func=self.python_repl.run,
+        #     )
+        # self.tools = [repl_tool]
+        self.tools = [python]
+        self.memory = MemorySaver()
+        self.bot = create_react_agent(self.llm, self.tools, checkpointer=self.memory)
+
+    def _run_agentic_pipeline(self, messages):
+        """
+        Run the agentic pipeline to generate responses based on the input messages using the LangGraph.
+        """
+        assert (
+            len(messages) == 2 and messages[0]["role"] == "system" and messages[1]["role"] == "user"
+        ), "Only support 1 system message and 1 user message as input."
+        # inputs = messages
+        for event in self.bot.stream(
+            {"messages": [("system", messages[0]["content"]), ("user", "calculate the 1000th Fibonacci number")]},
+            self.agentic_config,
+        ):
+            continue
+        breakpoint()
+
+        final_state = self.bot.get_state(self.agentic_config)
+        transformer_messages = []
+        for message in final_state[0]["messages"]:
+            tool_calls = None
+            if isinstance(message, SystemMessage):
+                message.content
+            elif isinstance(message, HumanMessage):
+                message.content
+            elif isinstance(message, AIMessage):
+                message.content
+                tool_calls = message.get("tool_calls", None)  # [{"type": "function", "function": tool_call}]
+            elif isinstance(message, ToolMessage):
+                message.content
+
+        return transformer_messages
@@ -0,0 +1,185 @@
+# -------------------------------
+# 1. Define the Python tool
+# -------------------------------
+import copy
+import io
+import random
+import sys
+from typing import Dict, List
+
+import ray
+import torch
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AIMessage
+from langchain_core.outputs.chat_generation import ChatGeneration
+from langchain_core.outputs.chat_result import ChatResult
+from langchain_core.prompts import PromptTemplate
+from langchain_core.tools import tool
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.prebuilt import create_react_agent
+from tool_calling_llm import ToolCallingLLM
+from transformers import AutoTokenizer
+
+SYSTEM_PROMPT_TEMPLATE = """{task_description}. You have access to the following tools:
+
+{tools}
+
+Use the following format:
+
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can repeat N times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+
+Begin!
+
+Question: {input}
+Thought:{agent_scratchpad}"""
+
+SYSTEM_PROMPT = PromptTemplate.from_template(SYSTEM_PROMPT_TEMPLATE)
+
+
+class Capturing(list):
+    """Capture stdout prints inside exec()"""
+
+    def __enter__(self):
+        self._stdout = sys.stdout
+        sys.stdout = self._stringio = io.StringIO()
+        return self
+
+    def __exit__(self, *args):
+        self.extend(self._stringio.getvalue().splitlines())
+        sys.stdout = self._stdout
+
+
+@tool
+def python(code: str) -> str:
+    """
+    This function executes a string of Python code and returns the printed output.
+    You need to print the output. Please import all libraries used in the code string.
+    """
+    local_vars = {}
+    with Capturing() as output:
+        exec(code, {}, local_vars)
+    if output == []:
+        return "Error: No output printed from the code. Please ensure you print the output."
+    return "\n".join(output)
+
+
+# -------------------------------
+# 2. Define a Custom API LLM wrapper
+# -------------------------------
+class CustomOpenAIAPILLM:
+    def __init__(self, cfg: dict, producer_idx, generation_workers=None):
+        self.producer_idx = producer_idx
+        self.generation_workers = generation_workers
+        self.load_balancer_idx = producer_idx % len(self.generation_workers)
+        assert "model" in cfg, "Please specify the model name in the config"
+        self.tokenizer = AutoTokenizer.from_pretrained(cfg["model"])
+        self.role_mapping = {
+            "system": "system",
+            "user": "user",
+            "assistant": "assistant",
+            "human": "user",
+            "tool": "tool",
+        }
+
+    def invoke(self, messages: List[Dict[str, str]], **kwargs) -> str:
+        """
+        messages: list of {"role": "user"/"assistant"/"system", "content": "..."}
+        """
+        # load balancing
+        load = [ray.get(generation_worker.get_producer_load.remote()) for generation_worker in self.generation_workers]
+        min_load = min(load)
+        candidates = [i for i, l in enumerate(load) if l == min_load]
+        # random tie break
+        self.load_balancer_idx = random.choice(candidates)
+        generation_worker = self.generation_workers[self.load_balancer_idx]
+        transformer_messages = []
+        for message in messages:
+            transformer_messages.append({"role": self.role_mapping[message.type], "content": message.content})
+        input_ids = self.tokenizer.apply_chat_template(
+            transformer_messages, return_tensors="pt", tokenize=True, add_generation_prompt=True
+        )
+        attention_mask = torch.ones_like(input_ids)
+        rollouts = ray.get(generation_worker.generate.remote(input_ids, attention_mask, **kwargs))
+        response = self.tokenizer.batch_decode(
+            rollouts["input_ids"][0][:, input_ids.size(-1) :], skip_special_tokens=True
+        )[0]
+        return response
+
+
+class LangChainCustomLLM(ToolCallingLLM, BaseChatModel):
+    client: CustomOpenAIAPILLM = None
+
+    def __init__(self, client: CustomOpenAIAPILLM):
+        super().__init__()
+        self.client = client
+
+    def _generate(self, messages, stop=None, run_manager=None, **kwargs):
+        # content = self.client.invoke([m.dict() for m in messages])
+        # chat_result = ChatResult(
+        #     generations=[ChatGeneration(message=AIMessage(content=content))]
+        # )
+        print("messages:", messages)
+        breakpoint()
+        system_message, functions = self._generate_system_message_and_functions(kwargs)
+        sample_params = {"stop": stop} if stop is not None else {}
+        sample_params.update({k: v for k, v in kwargs.items() if k in ["temperature", "top_p", "top_k", "max_tokens"]})
+        messages_ = copy.deepcopy(messages)
+        messages_[0].content = messages_[0].content + "\n" + system_message.content
+        response_message = self.client.invoke(  # type: ignore[safe-super]
+            [system_message] + messages, **{"sample_params": sample_params}
+        )
+        breakpoint()
+        response = self._process_response(AIMessage(content=response_message), functions)
+        return ChatResult(generations=[ChatGeneration(message=response)])
+
+    @property
+    def _llm_type(self) -> str:
+        return "custom-api-llm"
+
+
+# -------------------------------
+# 3. Build a ReAct Agent with LangGraph
+# -------------------------------
+def build_agent():
+    # Wrap custom API LLM in LangChain-compatible interface
+
+    # Init LLM
+    llm_client = CustomOpenAIAPILLM()
+    llm = LangChainCustomLLM(llm_client)
+
+    # Tools
+    tools = [python]
+
+    # Memory (optional)
+    memory = MemorySaver()
+
+    # Build ReAct agent
+    agent = create_react_agent(llm, tools, checkpointer=memory)
+    return agent
+
+
+# -------------------------------
+# 4. Run the agent on a math problem
+# -------------------------------
+if __name__ == "__main__":
+    agent = build_agent()
+
+    # Example math question
+    user_input = "What is the least common multiple of 18 and 24? Use Python if needed."
+
+    config = {"configurable": {"thread_id": "math-1"}}
+    for event in agent.stream({"messages": [("user", user_input)]}, config):
+        if "agent" in event:
+            print("Agent event:", event["agent"]["messages"][-1].content)
+        elif "tools" in event:
+            print("Tool event:", event["tools"]["messages"][-1].content)
+
+    final_state = agent.get_state(config)
+    print("Final Answer:", final_state["messages"][-1].content)