Add Fixup mechanics for generators with a new entry to tool calls with multi-modal content in OpenAI

monoxgas · monoxgas · commit 558e9c403c1c · 2025-02-18T16:16:49.000-07:00
diff --git a/rigging/generator/base.py b/rigging/generator/base.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+import abc
 import inspect
 import typing as t
+from dataclasses import dataclass, field
 
 from loguru import logger
 from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, field_validator
@@ -21,16 +23,59 @@
 
 CallableT = t.TypeVar("CallableT", bound=t.Callable[..., t.Any])
 
+T = t.TypeVar("T")
+
 # Global provider map
 
 
 @t.runtime_checkable
 class LazyGenerator(t.Protocol):
-    def __call__(self) -> type[Generator]: ...
+    def __call__(self) -> type[Generator]:
+        ...
 
 
 g_providers: dict[str, type[Generator] | LazyGenerator] = {}
 
+# Fixups
+
+
+class Fixup(abc.ABC):
+    """
+    Base class for fixups that apply on message sequences to correct errors.
+    """
+
+    @abc.abstractmethod
+    def can_fix(self, exception: Exception) -> bool:
+        """
+        Check if the fixup can resolve the given exception if made active.
+
+        Args:
+            exception: The exception to be checked.
+
+        Returns:
+            Whether the fixup can handle the exception.
+        """
+        ...
+
+    @abc.abstractmethod
+    def fix(self, messages: t.Sequence[Message]) -> t.Sequence[Message]:
+        """
+        Process a sequence of messages to fix them.
+
+        Args:
+            messages: The messages to be fixed.
+
+        Returns:
+            The fixed messages.
+        """
+        ...
+
+
+@dataclass
+class Fixups:
+    available: list[Fixup] = field(default_factory=list)
+    active: list[Fixup] = field(default_factory=list)
+
 
 # TODO: We also would like to support N-style
 # parallel generation eventually -> need to
@@ -251,6 +296,8 @@ class Generator(BaseModel):
     _watch_callbacks: list[WatchCallbacks] = []
     _wrap: t.Callable[[CallableT], CallableT] | None = None
 
+    _fixups: Fixups = Fixups()
+
     def to_identifier(self, params: GenerateParams | None = None) -> str:
         """
         Converts the generator instance back into a rigging identifier string.
@@ -323,6 +370,38 @@ def wrap(self, func: t.Callable[[CallableT], CallableT] | None) -> Self:
         self._wrap = func  # type: ignore [assignment]
         return self
 
+    def _check_fixups(self, error: Exception) -> bool:
+        """
+        Check if any fixer can handle this error.
+
+        Args:
+            error: The error to be checked.
+
+        Returns:
+            Whether a fixer was able to handle the error.
+        """
+        for fixup in self._fixups.available[:]:
+            if fixup.can_fix(error):
+                self._fixups.active.append(fixup)
+                self._fixups.available.remove(fixup)
+                return True
+        return False
+
+    async def _apply_fixups(self, messages: t.Sequence[Message]) -> t.Sequence[Message]:
+        """
+        Apply all active fixups to the messages.
+
+        Args:
+            messages: The messages to be fixed.
+
+        Returns:
+            The fixed messages.
+        """
+        current_messages = messages
+        for fixup in self._fixups.active:
+            current_messages = fixup.fix(current_messages)
+        return current_messages
+
     async def generate_messages(
         self,
         messages: t.Sequence[t.Sequence[Message]],
@@ -381,14 +460,16 @@ def chat(
         self,
         messages: t.Sequence[MessageDict],
         params: GenerateParams | None = None,
-    ) -> ChatPipeline: ...
+    ) -> ChatPipeline:
+        ...
 
     @t.overload
     def chat(
         self,
         messages: t.Sequence[Message] | MessageDict | Message | str | None = None,
         params: GenerateParams | None = None,
-    ) -> ChatPipeline: ...
+    ) -> ChatPipeline:
+        ...
 
     def chat(
         self,
@@ -457,15 +538,17 @@ def chat(
     generator: Generator,
     messages: t.Sequence[MessageDict],
     params: GenerateParams | None = None,
-) -> ChatPipeline: ...
+) -> ChatPipeline:
+    ...
 
 
 @t.overload
 def chat(
     generator: Generator,
     messages: t.Sequence[Message] | MessageDict | Message | str | None = None,
     params: GenerateParams | None = None,
-) -> ChatPipeline: ...
+) -> ChatPipeline:
+    ...
 
 
 def chat(
diff --git a/rigging/generator/litellm_.py b/rigging/generator/litellm_.py
@@ -9,6 +9,8 @@
 from loguru import logger
 
 from rigging.generator.base import (
+    Fixup,
+    Fixups,
     GeneratedMessage,
     GeneratedText,
     GenerateParams,
@@ -23,6 +25,42 @@
 # fix it to prevent confusion
 litellm.drop_params = True
 
+# Prevent the small debug statements
+# from being printed to the console
+litellm.suppress_debug_info = True
+
+
+class OpenAIToolsWithImageURLsFixup(Fixup):
+    # As of writing, openai doesn't support multi-part messages
+    # associated with the `tool` role. This is complicated by
+    # the fact that we need to resolve the tool call(s) in the
+    # following messages. To get around this, we'll resolve the tool
+    # call with empty content, and duplicate the multi-part data
+    # into a user message immediately following it. We also need
+    # to take care of multiple tool calls next to eachother and ensure
+    # we don't add the user message in between them.
+
+    def can_fix(self, exception: Exception) -> bool:
+        return (
+            "Image URLs are only allowed for messages with role 'user', but this message with role 'tool' contains an image URL."
+            in str(exception)
+        )
+
+    def fix(self, items: t.Sequence[Message]) -> t.Sequence[Message]:
+        updated_messages: list[Message] = []
+        append_queue: list[Message] = []
+        for message in items:
+            if message.role == "tool" and isinstance(message.all_content, list):
+                updated_messages.append(message.model_copy(deep=True, update={"all_content": "See next message"}))
+                append_queue.append(message.model_copy(deep=True, update={"role": "user"}))
+            else:
+                updated_messages.extend(append_queue)
+                append_queue = []
+                updated_messages.append(message)
+
+        updated_messages.extend(append_queue)
+        return updated_messages
+
 
 class LiteLLMGenerator(Generator):
     """
@@ -65,6 +103,8 @@ class LiteLLMGenerator(Generator):
     _semaphore: asyncio.Semaphore | None = None
     _last_request_time: datetime.datetime | None = None
 
+    _fixups = Fixups(available=[OpenAIToolsWithImageURLsFixup()])
+
     @property
     def semaphore(self) -> asyncio.Semaphore:
         if self._semaphore is None:
@@ -155,12 +195,20 @@ async def _generate_message(self, messages: t.Sequence[Message], params: Generat
             if self._wrap is not None:
                 acompletion = self._wrap(acompletion)
 
-            response = await acompletion(
-                model=self.model,
-                messages=[message.to_openai_spec() for message in messages],
-                api_key=self.api_key,
-                **self.params.merge_with(params).to_dict(),
-            )
+            # Prepare messages for specific providers
+            messages = await self._apply_fixups(messages)
+
+            try:
+                response = await acompletion(
+                    model=self.model,
+                    messages=[message.to_openai_spec() for message in messages],
+                    api_key=self.api_key,
+                    **self.params.merge_with(params).to_dict(),
+                )
+            except Exception as e:
+                if self._check_fixups(e):
+                    return await self._generate_message(messages, params)
+                raise
 
             self._last_request_time = datetime.datetime.now()
             return self._parse_model_response(response)
diff --git a/rigging/message.py b/rigging/message.py
@@ -30,6 +30,7 @@
 from rigging.model import Model, ModelT  # noqa: TCH001
 from rigging.parsing import try_parse_many
 from rigging.tool.api import ToolCall
+from rigging.util import truncate_string
 
 Role = t.Literal["system", "user", "assistant", "tool"]
 """The role of a message. Can be 'system', 'user', 'assistant', or 'tool'."""
@@ -84,6 +85,9 @@ class ContentText(BaseModel):
     text: str
     """The text content."""
 
+    def __str__(self) -> str:
+        return self.text
+
 
 class ContentImageUrl(BaseModel):
     """An image URL content part of a message."""
@@ -116,6 +120,9 @@ def from_file(cls, file: Path | str, *, mimetype: str | None = None) -> ContentI
 
         return cls(image_url=cls.ImageUrl(url=url))
 
+    def __str__(self) -> str:
+        return f"<ContentImageUrl '{truncate_string(self.image_url.url, 50)}'>"
+
 
 Content = t.Union[ContentText, ContentImageUrl]
 """The types of content that can be included in a message."""
@@ -181,9 +188,15 @@ def __init__(
         )
 
     def __str__(self) -> str:
-        formatted = f"[{self.role}]: {self.content}"
+        formatted = f"[{self.role}]:"
+        if isinstance(self.all_content, list):
+            formatted += "\n |- " + "\n |- ".join(str(content) for content in self.all_content)
+        else:
+            formatted += f" {self.content}"
+
         for tool_call in self.tool_calls or []:
             formatted += f"\n |- {tool_call}"
+
         return formatted
 
     def __len__(self) -> int:
diff --git a/rigging/tool/api.py b/rigging/tool/api.py
@@ -80,7 +80,7 @@ class ToolCall(BaseModel):
     function: FunctionCall
 
     def __str__(self) -> str:
-        return f"{self.function.name}({self.function.arguments})"
+        return f"<ToolCall {self.function.name}({self.function.arguments})>"
 
 
 class ApiTool:
diff --git a/rigging/util.py b/rigging/util.py
@@ -148,3 +148,16 @@ def get_qualified_name(obj: t.Callable[..., t.Any]) -> str:
 
     # Fallback
     return obj.__class__.__qualname__
+
+
+# Formatting
+
+
+def truncate_string(content: str, max_length: int, *, sep: str = "...") -> str:
+    """Return a string at most max_length characters long."""
+    if len(content) <= max_length:
+        return content
+
+    remaining = max_length - len(sep)
+    middle = remaining // 2
+    return content[:middle] + sep + content[-middle:]