diff --git a/README.md b/README.md index 6aef45fd..f2de5404 100644 --- a/README.md +++ b/README.md @@ -250,13 +250,12 @@ If the problem isn't self-evident, you can also reach into the `.last_turn()`, w - - -Also, since `chatlas` builds on top of packages like `anthropic` and `openai`, you can also enable their debug logging to get even more detailed information about what's going on under the hood: +For monitoring issues in a production (or otherwise non-interactive) environment, you may want to enabling logging. Also, since `chatlas` builds on top of packages like `anthropic` and `openai`, you can also enable their debug logging to get lower-level information, like HTTP requests and response codes. ```shell -$ export ANTHROPIC_LOG=info +$ export CHATLAS_LOG=info $ export OPENAI_LOG=info +$ export ANTHROPIC_LOG=info ``` ### Next steps diff --git a/chatlas/_anthropic.py b/chatlas/_anthropic.py index c551a754..bdb6cbb6 100644 --- a/chatlas/_anthropic.py +++ b/chatlas/_anthropic.py @@ -16,11 +16,11 @@ ContentToolRequest, ContentToolResult, ) +from ._logging import log_model_default from ._provider import Provider from ._tokens import tokens_log from ._tools import Tool, basemodel_to_param_schema from ._turn import Turn, normalize_turns -from ._utils import log_model_default if TYPE_CHECKING: from anthropic.types import ( diff --git a/chatlas/_chat.py b/chatlas/_chat.py index 2f1b9982..dfbb9f2b 100644 --- a/chatlas/_chat.py +++ b/chatlas/_chat.py @@ -34,6 +34,7 @@ MarkdownDisplay, MockMarkdownDisplay, ) +from ._logging import log_tool_error from ._provider import Provider from ._tools import Tool from ._turn import Turn, user_turn @@ -1035,6 +1036,7 @@ def _invoke_tool( return ContentToolResult(id_, result, None) except Exception as e: + log_tool_error(func.__name__, str(arguments), e) return ContentToolResult(id_, None, str(e)) @staticmethod @@ -1054,6 +1056,7 @@ async def _invoke_tool_async( return ContentToolResult(id_, result, None) except Exception as e: + log_tool_error(func.__name__, str(arguments), e) return ContentToolResult(id_, None, str(e)) def _markdown_display( diff --git a/chatlas/_display.py b/chatlas/_display.py index c76ed424..33bc5bbd 100644 --- a/chatlas/_display.py +++ b/chatlas/_display.py @@ -1,9 +1,12 @@ +import logging from abc import ABC, abstractmethod from typing import Any from uuid import uuid4 from rich.live import Live +from rich.logging import RichHandler +from ._logging import logger from ._typing_extensions import TypedDict @@ -64,6 +67,15 @@ def update(self, content: str): def __enter__(self): self.live.__enter__() + # Live() isn't smart enough to know to automatically display logs when + # when they get handled while it Live() is active. + # However, if the logging handler is a RichHandler, it can be told + # about the live console so it can add logs to the top of the Live console. + handlers = [*logging.getLogger().handlers, *logger.handlers] + for h in handlers: + if isinstance(h, RichHandler): + h.console = self.live.console + return self def __exit__(self, exc_type, exc_value, traceback): diff --git a/chatlas/_github.py b/chatlas/_github.py index 25fa8005..b97c7285 100644 --- a/chatlas/_github.py +++ b/chatlas/_github.py @@ -4,9 +4,10 @@ from typing import TYPE_CHECKING, Optional from ._chat import Chat +from ._logging import log_model_default from ._openai import ChatOpenAI from ._turn import Turn -from ._utils import MISSING, MISSING_TYPE, log_model_default +from ._utils import MISSING, MISSING_TYPE if TYPE_CHECKING: from ._openai import ChatCompletion diff --git a/chatlas/_google.py b/chatlas/_google.py index c7874dcb..f4e96d86 100644 --- a/chatlas/_google.py +++ b/chatlas/_google.py @@ -15,10 +15,10 @@ ContentToolRequest, ContentToolResult, ) +from ._logging import log_model_default from ._provider import Provider from ._tools import Tool, basemodel_to_param_schema from ._turn import Turn, normalize_turns -from ._utils import log_model_default if TYPE_CHECKING: from google.generativeai.types.content_types import ( diff --git a/chatlas/_groq.py b/chatlas/_groq.py index 7f2e2a60..9e4b99c1 100644 --- a/chatlas/_groq.py +++ b/chatlas/_groq.py @@ -4,9 +4,10 @@ from typing import TYPE_CHECKING, Optional from ._chat import Chat +from ._logging import log_model_default from ._openai import ChatOpenAI from ._turn import Turn -from ._utils import MISSING, MISSING_TYPE, log_model_default +from ._utils import MISSING, MISSING_TYPE if TYPE_CHECKING: from ._openai import ChatCompletion diff --git a/chatlas/_logging.py b/chatlas/_logging.py new file mode 100644 index 00000000..214c9dcd --- /dev/null +++ b/chatlas/_logging.py @@ -0,0 +1,61 @@ +import logging +import os +import warnings + +from rich.logging import RichHandler + + +def _rich_handler() -> RichHandler: + formatter = logging.Formatter("%(name)s - %(message)s") + handler = RichHandler() + handler.setFormatter(formatter) + return handler + + +logger = logging.getLogger("chatlas") + +if os.environ.get("CHATLAS_LOG") == "info": + # By adding a RichHandler to chatlas' logger, we can guarantee that they + # never get dropped, even if the root logger's handlers are not + # RichHandlers. + logger.setLevel(logging.INFO) + logger.addHandler(_rich_handler()) + logger.propagate = False + + # Add a RichHandler to the root logger if there are no handlers. Note that + # if chatlas is imported before other libraries that set up logging, (like + # openai, anthropic, or httpx), this will ensure that logs from those + # libraries are also displayed in the rich console. + root = logging.getLogger() + if not root.handlers: + root.addHandler(_rich_handler()) + + # Warn if there are non-RichHandler handlers on the root logger. + # TODO: we could consider something a bit more abusive here, like removing + # non-RichHandler handlers from the root logger, but that could be + # surprising to users. + bad_handlers = [ + h.get_name() for h in root.handlers if not isinstance(h, RichHandler) + ] + if len(bad_handlers) > 0: + warnings.warn( + "When setting up logging handlers for CHATLAS_LOG, chatlas detected " + f"non-rich handler(s) on the root logger named {bad_handlers}. " + "As a result, logs handled those handlers may be dropped when the " + "`echo` argument of `.chat()`, `.stream()`, etc., is something " + "other than 'none'. This problem can likely be fixed by importing " + "`chatlas` before other libraries that set up logging, or adding a " + "RichHandler to the root logger before loading other libraries.", + ) + + +def log_model_default(model: str) -> str: + logger.info(f"Defaulting to `model = '{model}'`.") + return model + + +def log_tool_error(name: str, arguments: str, e: Exception): + logger.info( + f"Error invoking tool function '{name}' with arguments: {arguments}. " + f"The error message is: '{e}'", + ) diff --git a/chatlas/_openai.py b/chatlas/_openai.py index cda71b5f..3a8b5f45 100644 --- a/chatlas/_openai.py +++ b/chatlas/_openai.py @@ -1,7 +1,6 @@ from __future__ import annotations import json -import warnings from typing import TYPE_CHECKING, Any, Literal, Optional, cast, overload from pydantic import BaseModel @@ -16,12 +15,13 @@ ContentToolRequest, ContentToolResult, ) +from ._logging import log_model_default from ._merge import merge_dicts from ._provider import Provider from ._tokens import tokens_log from ._tools import Tool, basemodel_to_param_schema from ._turn import Turn, normalize_turns -from ._utils import MISSING, MISSING_TYPE, is_testing, log_model_default +from ._utils import MISSING, MISSING_TYPE, is_testing if TYPE_CHECKING: from openai.types.chat import ( @@ -473,11 +473,12 @@ def _as_turn( try: args = json.loads(func.arguments) if func.arguments else {} except json.JSONDecodeError: - warnings.warn( + raise ValueError( f"The model's completion included a tool request ({func.name}) " - "with invalid JSON for input arguments: '{func.arguments}'", - InvalidJSONParameterWarning, - stacklevel=2, + "with invalid JSON for input arguments: '{func.arguments}'" + "This can happen if the model hallucinates parameters not defined by " + "your function schema. Try revising your tool description and system " + "prompt to be more specific about the expected input arguments to this function." ) contents.append( diff --git a/chatlas/_perplexity.py b/chatlas/_perplexity.py index 7aa13032..04aef104 100644 --- a/chatlas/_perplexity.py +++ b/chatlas/_perplexity.py @@ -4,9 +4,10 @@ from typing import TYPE_CHECKING, Optional from ._chat import Chat +from ._logging import log_model_default from ._openai import ChatOpenAI from ._turn import Turn -from ._utils import MISSING, MISSING_TYPE, log_model_default +from ._utils import MISSING, MISSING_TYPE if TYPE_CHECKING: from ._openai import ChatCompletion diff --git a/chatlas/_utils.py b/chatlas/_utils.py index a60c9372..f3d184ec 100644 --- a/chatlas/_utils.py +++ b/chatlas/_utils.py @@ -2,15 +2,12 @@ import functools import inspect -import logging import os import re from typing import Awaitable, Callable, TypeVar, cast from ._typing_extensions import ParamSpec, TypeGuard -logger = logging.getLogger("chatlas") - # -------------------------------------------------------------------- # wrap_async() and is_async_callable() was copied from shiny/_utils.py # -------------------------------------------------------------------- @@ -80,11 +77,6 @@ class MISSING_TYPE: MISSING = MISSING_TYPE() -def log_model_default(model: str) -> str: - logger.info(f"Defaulting to `model = '{model}'`.") - return model - - # -------------------------------------------------------------------- # html_escape was copied from htmltools/_utils.py # -------------------------------------------------------------------- diff --git a/docs/tool-calling.qmd b/docs/tool-calling.qmd index 2f430975..35225912 100644 --- a/docs/tool-calling.qmd +++ b/docs/tool-calling.qmd @@ -20,57 +20,58 @@ from chatlas import ChatOpenAI ### Motivating example -Let's take a look at an example where we really need an external tool. Chat models generally do not know the current time, which makes questions like these impossible. +Let's take a look at an example where we really need an external tool. Chat models generally do not have access to "real-time" information, such as current events, weather, etc. Let's see what happens when we ask the chat model about the weather in a specific location: ```{python} -chat = ChatOpenAI(model="gpt-4o") -_ = chat.chat("How long ago exactly was the moment Neil Armstrong touched down on the moon?") +chat = ChatOpenAI(model="gpt-4o-mini") +_ = chat.chat("What's the weather like today in Duluth, MN?") ``` + -Unfortunately, the LLM doesn't hallucinates the current date. Let's give the chat model the ability to determine the current time and try again. +Fortunately, the model is smart enough to know that it doesn't have access to real-time information, and it doesn't try to make up an answer. However, we can help it out by providing a tool that can fetch the weather for a given location. ### Defining a tool function -The first thing we'll do is define a Python function that returns the current time. This will be our tool. +At it turns out, LLMs are pretty good at figuring out 'structure' like latitude and longitude from 'unstructured' things like a location name. So we can write a tool function that takes a latitude and longitude and returns the current temperature at that location. Here's an example of how you might write such a function using the [Open-Meteo API](https://open-meteo.com/): ```{python} -def get_current_time(tz: str = "UTC") -> str: +import requests + +def get_current_temperature(latitude: float, longitude: float): """ - Gets the current time in the given time zone. + Get the current weather given a latitude and longitude. Parameters ---------- - tz - The time zone to get the current time in. Defaults to "UTC". - - Returns - ------- - str - The current time in the given time zone. + latitude + The latitude of the location. + longitude + The longitude of the location. """ - from datetime import datetime - from zoneinfo import ZoneInfo - - return datetime.now(ZoneInfo(tz)).strftime("%Y-%m-%d %H:%M:%S %Z") + lat_lng = f"latitude={latitude}&longitude={longitude}" + url = f"https://api.open-meteo.com/v1/forecast?{lat_lng}¤t=temperature_2m,wind_speed_10m&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m" + response = requests.get(url) + json = response.json() + return json["current"] ``` Note that we've gone through the trouble of adding the following to our function: -- Type hints for arguments and the return value -- A docstring that explains what the function does and what arguments it expects +- Type hints for function arguments +- A docstring that explains what the function does and what arguments it expects (as well as descriptions for the arguments themselves) -**Providing these hints and context is very important**, as it helps the chat model understand how to use your tool correctly! +**Providing these hints and documentation is very important**, as it helps the chat model understand how to use your tool correctly! Let's test it: ```{python} -get_current_time() +get_current_temperature(46.7867, -92.1005) ``` ### Using the tool -In order for the LLM to make use of our tool, we need to register it with the chat object. This is done by calling the `register_tool` method on the chat object. +In order for the LLM to make use of our tool, we need to register it with the `chat` object. This is done by calling the `register_tool` method on the chat object. ```{python} chat.register_tool(get_current_time) @@ -79,12 +80,30 @@ chat.register_tool(get_current_time) Now let's retry our original question: ```{python} -_ = chat.chat("How long ago exactly was the moment Neil Armstrong touched down on the moon?") +_ = chat.chat("What's the weather like today in Duluth, MN?") ``` That's correct! Without any further guidance, the chat model decided to call our tool function and successfully used its result in formulating its response. -This tool example was extremely simple, but you can imagine doing much more interesting things from tool functions: calling APIs, reading from or writing to a database, kicking off a complex simulation, or even calling a complementary GenAI model (like an image generator). Or if you are using chatlas in a Shiny app, you could use tools to set reactive values, setting off a chain of reactive updates. +This tool example was extremely simple, but you can imagine doing much more interesting things from tool functions: calling APIs, reading from or writing to a database, kicking off a complex simulation, or even calling a complementary GenAI model (like an image generator). Or if you are using chatlas in a Shiny app, you could use tools to set reactive values, setting off a chain of reactive updates. This is precisely what the [sidebot dashboard](https://github.com/jcheng5/py-sidebot) does to allow for an AI assisted "drill-down" into the data. + +### Trouble-shooting + +When the execution of a tool function fails, chatlas sends the exception message back to the chat model. This can be useful for gracefully handling errors in the chat model. However, this can also lead to confusion as to why a response did not come back as expected. If you encounter such a situation, you can set `echo="all"` in the `chat.chat()` method to see the full conversation, including tool calls and their results. + +```{python} +def get_current_temperature(latitude: float, longitude: float): + "Get the current weather given a latitude and longitude." + raise ValueError("Failed to get current temperature") + +chat.tools = [get_current_temperature] + +_ = chat.chat("What's the weather like today in Duluth, MN?") +``` + + + + ### Tool limitations