feat(cohere): auto trace cohere

jalexanderII · jalexanderII · commit 2746240e8336 · 2024-07-30T15:32:37.000-04:00
diff --git a/cookbook/cohere/trace_cohere.py b/cookbook/cohere/trace_cohere.py
@@ -40,5 +40,26 @@
 print(response)
 print("\n\n")
 
+
+response = co.chat(
+    model="command-r-plus",
+    message="Where do the tallest penguins live?",
+    documents=[
+        {"title": "Tall penguins", "snippet": "Emperor penguins are the tallest."},
+        {"title": "Penguin habitats", "snippet": "Emperor penguins only live in Antarctica."},
+        {"title": "What are animals?", "snippet": "Animals are different from plants."},
+    ],
+)
+print(response)
+print("\n\n")
+
+response = co.chat(model="command-r-plus", message="Who is more popular: Nsync or Backstreet Boys?", search_queries_only=True)
+print(response)
+print("\n\n")
+
+response = co.chat(model="command-r-plus", message="Who is more popular: Nsync or Backstreet Boys?", connectors=[{"id": "web-search"}])
+print(response)
+print("\n\n")
+
 for event in co.chat_stream(message="Who discovered gravity?"):
     print(event)
diff --git a/cookbook/cohere/trace_cohere_tools.py b/cookbook/cohere/trace_cohere_tools.py
@@ -0,0 +1,98 @@
+import os
+
+import cohere
+from dotenv import load_dotenv
+
+from parea import Parea
+from parea.utils.universal_encoder import json_dumps
+
+load_dotenv()
+
+p = Parea(api_key=os.getenv("PAREA_API_KEY"))
+co = cohere.Client(api_key=os.getenv("COHERE_API_KEY"))
+p.wrap_cohere_client(co)
+
+
+def web_search(query: str) -> list[dict]:
+    # your code for performing a web search goes here
+    return [{"url": "https://en.wikipedia.org/wiki/Ontario", "text": "The capital of Ontario is Toronto, ..."}]
+
+
+web_search_tool = {
+    "name": "web_search",
+    "description": "performs a web search with the specified query",
+    "parameter_definitions": {"query": {"description": "the query to look up", "type": "str", "required": True}},
+}
+
+message = "Who is the mayor of the capital of Ontario?"
+model = "command-r-plus"
+
+# STEP 2: Check what tools the model wants to use and how
+
+res = co.chat(model=model, message=message, force_single_step=False, tools=[web_search_tool])
+
+# as long as the model sends back tool_calls,
+# keep invoking tools and sending the results back to the model
+while res.tool_calls:
+    print(res.text)  # This will be an observation and a plan with next steps
+    tool_results = []
+    for call in res.tool_calls:
+        # use the `web_search` tool with the search query the model sent back
+        web_search_results = {"call": call, "outputs": web_search(call.parameters["query"])}
+        tool_results.append(web_search_results)
+
+    # call chat again with tool results
+    res = co.chat(model="command-r-plus", chat_history=res.chat_history, message="", force_single_step=False, tools=[web_search_tool], tool_results=tool_results)
+
+print(res.text)  # "The mayor of Toronto, the capital of Ontario is Olivia Chow"
+
+
+# tool descriptions that the model has access to
+tools = [
+    {
+        "name": "query_daily_sales_report",
+        "description": "Connects to a database to retrieve overall sales volumes and sales information for a given day.",
+        "parameter_definitions": {"day": {"description": "Retrieves sales data for this day, formatted as YYYY-MM-DD.", "type": "str", "required": True}},
+    },
+    {
+        "name": "query_product_catalog",
+        "description": "Connects to a a product catalog with information about all the products being sold, including categories, prices, and stock levels.",
+        "parameter_definitions": {"category": {"description": "Retrieves product information data for all products in this category.", "type": "str", "required": True}},
+    },
+]
+
+# preamble containing instructions about the task and the desired style for the output.
+preamble = """
+## Task & Context
+You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.
+
+## Style Guide
+Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.
+"""
+
+# user request
+message = "Can you provide a sales summary for 29th September 2023, and also give me some details about the products in the 'Electronics' category, for example their prices and stock levels?"
+
+response = co.chat(message=message, force_single_step=True, tools=tools, preamble=preamble, model="command-r")
+print("The model recommends doing the following tool calls:")
+print("\n".join(str(tool_call) for tool_call in response.tool_calls))
+
+tool_results = []
+# Iterate over the tool calls generated by the model
+for tool_call in response.tool_calls:
+    # here is where you would call the tool recommended by the model, using the parameters recommended by the model
+    output = {"output": f"functions_map[{tool_call.name}]({tool_call.parameters})"}
+    # store the output in a list
+    outputs = [output]
+    # store your tool results in this format
+    tool_results.append({"call": tool_call, "outputs": outputs})
+
+
+print("Tool results that will be fed back to the model in step 4:")
+print(json_dumps(tool_results, indent=4))
+
+response = co.chat(message=message, tools=tools, tool_results=tool_results, preamble=preamble, model="command-r", temperature=0.3, force_single_step=True)
+
+
+print("Final answer:")
+print(response.text)
diff --git a/cookbook/langchain/trace_class_call_method.py b/cookbook/langchain/trace_class_call_method.py
@@ -12,10 +12,6 @@
 
 p = Parea(api_key=os.getenv("PAREA_API_KEY"))
 
-from langsmith.evaluation import LangChainStringEvaluator
-
-qa_evaluator = [LangChainStringEvaluator("cot_qa")]
-
 
 class LangChainModule:
     handler = PareaAILangchainTracer()
diff --git a/parea/wrapper/cohere/helpers.py b/parea/wrapper/cohere/helpers.py
@@ -0,0 +1,108 @@
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import functools
+
+import cohere
+from attrs import asdict, define
+from cohere import ApiMetaBilledUnits, NonStreamedChatResponse, RerankResponse
+
+from parea.constants import COHERE_MODEL_INFO, COHERE_SEARCH_MODELS
+from parea.schemas import Message, Role
+from parea.utils.universal_encoder import json_dumps
+
+DEFAULT_MODEL = "command-r-plus"
+DEFAULT_TEMPERATURE = 0.3
+DEFAULT_P = 0.75
+
+
+@define
+class CohereOutput:
+    text: Optional[str] = None
+    citations: Optional[str] = None
+    documents: Optional[str] = None
+    search_queries: Optional[str] = None
+    search_results: Optional[str] = None
+
+
+def chat_history_to_messages(result: NonStreamedChatResponse, **kwargs) -> list[Message]:
+    messages: list[Message] = []
+    if sys_message := kwargs.get("preamble", ""):
+        messages.append(Message(content=sys_message, role=Role.system))
+    if history := kwargs.get("chat_history", []):
+        messages.extend(to_messages(history))
+
+    messages.extend(to_messages([m.dict() for m in result.chat_history]))
+    return messages
+
+
+def to_messages(chat_history: List[Union[Dict, cohere.Message]]) -> List[Message]:
+    role_map = {"USER": Role.user, "CHATBOT": Role.assistant, "SYSTEM": Role.system, "TOOL": Role.tool}
+
+    def process_message(message: Union[Dict, cohere.Message]) -> Message:
+        if isinstance(message, dict):
+            role = role_map.get(message["role"], Role.user)
+            content = message.get("message", "")
+            tool_calls = message.get("tool_calls") or message.get("tool_results")
+        else:  # cohere.Message
+            role = role_map.get(message.role, Role.user)
+            content = "" if role == Role.tool else message.message
+            tool_calls = getattr(message, "tool_calls", None) or getattr(message, "tool_results", None)
+
+        if tool_calls:
+            tc = json_dumps([t.dict() if hasattr(t, "dict") else t for t in tool_calls])
+            content = tc if role == Role.tool or not content else json_dumps({"message": content, "tool_calls": tc})
+
+        return Message(content=content, role=role)
+
+    return list(map(process_message, chat_history))
+
+
+@functools.lru_cache(maxsize=128)
+def compute_cost(prompt_tokens: int, completion_tokens: int, search_units: int, is_search_model: bool, model: str) -> float:
+    cost_per_token = COHERE_MODEL_INFO.get(model, {"prompt": 0, "completion": 0})
+    cost = ((prompt_tokens * cost_per_token["prompt"]) + (completion_tokens * cost_per_token["completion"])) / 1_000_000
+    if is_search_model:
+        cost += search_units * cost_per_token.get("search", 0) / 1_000
+    cost = round(cost, 10)
+    return cost
+
+
+def get_usage_stats(result: Optional[NonStreamedChatResponse | RerankResponse], model: str) -> Tuple[int, int, float]:
+    bu: Optional[ApiMetaBilledUnits] = result.meta.billed_units if result else None
+    if not bu:
+        return 0, 0, 0.0
+    prompt_tokens = bu.input_tokens or 0
+    completion_tokens = bu.output_tokens or 0
+    search_units = bu.search_units or 0
+    is_search_model: bool = model in COHERE_SEARCH_MODELS
+    cost = compute_cost(prompt_tokens, completion_tokens, search_units, is_search_model, model)
+    return prompt_tokens, completion_tokens, cost
+
+
+def get_output(result: Optional[NonStreamedChatResponse | RerankResponse]) -> str:
+    if not result:
+        return ""
+
+    if isinstance(result, RerankResponse):
+        output = CohereOutput(documents=cohere_json_list(result.results) if result.results else None)
+        return json_dumps(asdict(output))
+
+    text = result.text or cohere_json_list(result.tool_calls)
+    output = CohereOutput(
+        text=text,
+        citations=cohere_json_list(result.citations) if result.citations else None,
+        documents=cohere_json_list(result.documents) if result.documents else None,
+        search_queries=cohere_json_list(result.search_queries) if result.search_queries else None,
+        search_results=cohere_json_list(result.search_results) if result.search_results else None,
+    )
+    return json_dumps(asdict(output))
+
+
+def cohere_json_list(obj: Any) -> str:
+    out = []
+    for o in obj or []:
+        if isinstance(o, dict):
+            out.append(o)
+        else:
+            out.append(o.dict())
+    return json_dumps(out)
diff --git a/parea/wrapper/cohere/wrap_cohere.py b/parea/wrapper/cohere/wrap_cohere.py
@@ -11,30 +11,17 @@
 import traceback
 
 import cohere
-from attrs import asdict, define
-from cohere import ApiMetaBilledUnits, NonStreamedChatResponse, RerankResponse
+from cohere import NonStreamedChatResponse, RerankResponse
 
-from parea.constants import COHERE_MODEL_INFO, COHERE_SEARCH_MODELS, PAREA_OS_ENV_EXPERIMENT_UUID
+from parea.constants import PAREA_OS_ENV_EXPERIMENT_UUID
 from parea.helpers import gen_trace_id, is_logging_disabled, timezone_aware_now
-from parea.schemas import LLMInputs, Message, ModelParams, Role, TraceLog, UpdateTraceScenario
+from parea.schemas import LLMInputs, ModelParams, TraceLog, UpdateTraceScenario
 from parea.utils.trace_utils import execution_order_counters, fill_trace_data, logger_record_log, trace_context, trace_data
 from parea.utils.universal_encoder import json_dumps
+from parea.wrapper.cohere.helpers import DEFAULT_MODEL, DEFAULT_P, DEFAULT_TEMPERATURE, chat_history_to_messages, get_output, get_usage_stats
 
 logger = logging.getLogger()
 
-DEFAULT_MODEL = "command-r-plus"
-DEFAULT_TEMPERATURE = 0.3
-DEFAULT_P = 0.75
-
-
-@define
-class CohereOutput:
-    text: Optional[str] = None
-    citations: Optional[str] = None
-    documents: Optional[str] = None
-    search_queries: Optional[str] = None
-    search_results: Optional[str] = None
-
 
 class CohereClientWrapper:
     @staticmethod
@@ -201,7 +188,7 @@ def _fill_llm_config(trace_id: str, result: Optional[NonStreamedChatResponse | R
         """
         try:
             model = kwargs.get("model", DEFAULT_MODEL)
-            tools = kwargs.get("tools")
+            tools = kwargs.get("tools", None)
             configuration = LLMInputs(
                 model=model,
                 provider="cohere",
@@ -213,13 +200,13 @@ def _fill_llm_config(trace_id: str, result: Optional[NonStreamedChatResponse | R
                     max_length=kwargs.get("max_tokens"),
                     response_format=kwargs.get("response_format"),
                 ),
-                messages=CohereClientWrapper._chat_history_to_messages(result, **kwargs) if isinstance(result, NonStreamedChatResponse) else None,
-                functions=json_dumps(tools) if tools else None,
+                messages=chat_history_to_messages(result, **kwargs) if isinstance(result, NonStreamedChatResponse) else None,
+                functions=tools,
             )
-            prompt_tokens, completion_tokens, cost = CohereClientWrapper._get_usage_stats(result, model)
+            prompt_tokens, completion_tokens, cost = get_usage_stats(result, model)
             data = {
                 "configuration": configuration,
-                "output": CohereClientWrapper._get_output(result),
+                "output": get_output(result),
                 "input_tokens": prompt_tokens,
                 "output_tokens": completion_tokens,
                 "total_tokens": prompt_tokens + completion_tokens,
@@ -230,77 +217,6 @@ def _fill_llm_config(trace_id: str, result: Optional[NonStreamedChatResponse | R
             logger.debug(f"Error occurred filling LLM config for trace {trace_id}, {e}", exc_info=True)
             fill_trace_data(trace_id, {"error": traceback.format_exc()}, UpdateTraceScenario.ERROR)
 
-    @staticmethod
-    def _chat_history_to_messages(result: NonStreamedChatResponse, **kwargs) -> list[Message]:
-        messages: list[Message] = []
-        if sys_message := kwargs.get("preamble", ""):
-            messages.append(Message(content=sys_message, role=Role.system))
-        if history := kwargs.get("chat_history", []):
-            messages.extend(CohereClientWrapper._to_messages(history))
-
-        messages.extend(CohereClientWrapper._to_messages([m.dict() for m in result.chat_history]))
-        return messages
-
-    @staticmethod
-    def _to_messages(chat_history: list[dict]) -> list[Message]:
-        messages: list[Message] = []
-        for message in chat_history:
-            if message["role"] == "USER":
-                messages.append(Message(content=message["message"], role=Role.user))
-            elif message["role"] == "CHATBOT":
-                messages.append(Message(content=message["message"], role=Role.assistant))
-            elif message["role"] == "SYSTEM":
-                messages.append(Message(content=message["message"], role=Role.system))
-            elif message["role"] == "TOOL":
-                messages.append(Message(content=json_dumps(message["tool_calls"]), role=Role.tool))
-
-        return messages
-
-    @staticmethod
-    @functools.lru_cache(maxsize=128)
-    def _compute_cost(prompt_tokens: int, completion_tokens: int, search_units: int, is_search_model: bool, model: str) -> float:
-        cost_per_token = COHERE_MODEL_INFO.get(model, {"prompt": 0, "completion": 0})
-        cost = ((prompt_tokens * cost_per_token["prompt"]) + (completion_tokens * cost_per_token["completion"])) / 1_000_000
-        if is_search_model:
-            cost += search_units * cost_per_token.get("search", 0) / 1_000
-        cost = round(cost, 10)
-        return cost
-
-    @staticmethod
-    def _get_usage_stats(result: Optional[NonStreamedChatResponse | RerankResponse], model: str) -> Tuple[int, int, float]:
-        bu: Optional[ApiMetaBilledUnits] = result.meta.billed_units if result else None
-        if not bu:
-            return 0, 0, 0.0
-        prompt_tokens = bu.input_tokens or 0
-        completion_tokens = bu.output_tokens or 0
-        search_units = bu.search_units or 0
-        is_search_model: bool = model in COHERE_SEARCH_MODELS
-        cost = CohereClientWrapper._compute_cost(prompt_tokens, completion_tokens, search_units, is_search_model, model)
-        return prompt_tokens, completion_tokens, cost
-
-    @staticmethod
-    def _get_output(result: Optional[NonStreamedChatResponse | RerankResponse]) -> str:
-        if not result:
-            return ""
-
-        if isinstance(result, RerankResponse):
-            output = CohereOutput(documents=CohereClientWrapper._cohere_json_list(result.results) if result.results else None)
-            return json_dumps(asdict(output))
-
-        text = result.text or CohereClientWrapper._cohere_json_list(result.tool_calls)
-        output = CohereOutput(
-            text=text,
-            citations=CohereClientWrapper._cohere_json_list(result.citations) if result.citations else None,
-            documents=CohereClientWrapper._cohere_json_list(result.documents) if result.documents else None,
-            search_queries=CohereClientWrapper._cohere_json_list(result.search_queries) if result.search_queries else None,
-            search_results=CohereClientWrapper._cohere_json_list(result.search_results) if result.search_results else None,
-        )
-        return json_dumps(asdict(output))
-
-    @staticmethod
-    def _cohere_json_list(obj: Any) -> str:
-        return json_dumps([o.dict() for o in obj])
-
     @staticmethod
     def init(client: Union[cohere.Client, cohere.AsyncClient]) -> None:
         """