generative-computing
diff --git a/‎docs/examples/agents/react.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/examples/agents/react.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/examples/agents/react_instruct.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/examples/agents/react_instruct.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/tutorial.md‎
Lines changed: 10 additions & 8 deletions b/‎docs/tutorial.md‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎mellea/backends/formatter.py‎
Lines changed: 9 additions & 9 deletions b/‎mellea/backends/formatter.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎mellea/backends/huggingface.py‎
Lines changed: 11 additions & 8 deletions b/‎mellea/backends/huggingface.py‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎mellea/backends/ollama.py‎
Lines changed: 1 addition & 1 deletion b/‎mellea/backends/ollama.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mellea/backends/openai.py‎
Lines changed: 9 additions & 7 deletions b/‎mellea/backends/openai.py‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎mellea/backends/watsonx.py‎
Lines changed: 1 addition & 1 deletion b/‎mellea/backends/watsonx.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mellea/stdlib/base.py‎
Lines changed: 14 additions & 5 deletions b/‎mellea/stdlib/base.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎mellea/stdlib/chat.py‎
Lines changed: 3 additions & 3 deletions b/‎mellea/stdlib/chat.py‎
Lines changed: 3 additions & 3 deletions
@@ -103,7 +103,7 @@ def react(
     react_toolbox: ReactToolbox,
 ):
     assert m.ctx.is_chat_context, "ReACT requires a chat context."
-    test_ctx_lin = m.ctx.linearize()
+    test_ctx_lin = m.ctx.render_for_generation()
     assert test_ctx_lin is not None and len(test_ctx_lin) == 0, (
         "ReACT expects a fresh context."
     )
 
@@ -101,7 +101,7 @@ def react(
     react_toolbox: ReactToolbox,
 ):
     assert m.ctx.is_chat_context, "ReACT requires a chat context."
-    test_ctx_lin = m.ctx.linearize()
+    test_ctx_lin = m.ctx.render_for_generation()
     assert test_ctx_lin is not None and len(test_ctx_lin) == 0, (
         "ReACT expects a fresh context."
     )
 
@@ -968,15 +968,15 @@ Let's look at how this agent is implemented in Mellea:
 ```python
 # file: https://github.com/generative-computing/mellea/blob/main/docs/examples/agents/react.py#L99
 def react(
-    m: mellea.MelleaSession,
-    goal: str,
-    react_toolbox: ReactToolbox,
-    budget: int=5,
+        m: mellea.MelleaSession,
+        goal: str,
+        react_toolbox: ReactToolbox,
+        budget: int = 5,
 ):
     assert m.ctx.is_chat_context, "ReACT requires a chat context."
-    test_ctx_lin = m.ctx.linearize()
+    test_ctx_lin = m.ctx.render_for_generation()
     assert (
-        test_ctx_lin is not None and len(test_ctx_lin) == 0
+            test_ctx_lin is not None and len(test_ctx_lin) == 0
     ), "ReACT expects a fresh context."
 
     # Construct the system prompt for ReACT.
@@ -1006,15 +1006,17 @@ def react(
             # model_options={mellea.backends.types.ModelOption.TOOLS: react_toolbox.tools_dict()},
             format=react_toolbox.tool_name_schema(),
         )
-        selected_tool: ReactTool = react_toolbox.get_tool_from_schema(act.content)
+        selected_tool: ReactTool = react_toolbox.get_tool_from_schema(
+            act.content)
         print(selected_tool.get_name())
 
         print(f"### Arguments for action")
         act_args = m.chat(
             "Choose arguments for the tool. Respond using JSON and include only the tool arguments in your response.",
             format=selected_tool.args_schema(),
         )
-        print(f"```json\n{json.dumps(json.loads(act_args.content), indent=2)}\n```")
+        print(
+            f"```json\n{json.dumps(json.loads(act_args.content), indent=2)}\n```")
 
         # TODO: handle exceptions.
         print("### Observation")
 
@@ -5,6 +5,7 @@
 import re
 import sys
 from collections.abc import Iterable, Mapping
+from dataclasses import fields
 from typing import Any
 
 import jinja2
@@ -166,7 +167,7 @@ def print_context(self, ctx: Context) -> str:
         )
         match ctx:
             case LinearContext():
-                linearized_ctx = ctx.linearize()
+                linearized_ctx = ctx.render_for_generation()
                 assert linearized_ctx is not None
                 return "".join([self.print(x) for x in linearized_ctx])
             case SimpleContext():
@@ -396,14 +397,13 @@ def _get_model_id(self) -> str:
             "model_id was neither a `str` nor `ModelIdentifier`"
         )
 
-        # Go through the ModelIdentifier's fields, find one that isn't `"None"` or `""`.
-        ids = [model_id.hf_model_name, model_id.ollama_name]
-        model_id = ""
-        for val in ids:
-            if val != "None" and val != "":
-                model_id = val  # type: ignore
-                break
-        return model_id
+        # Go through the ModelIdentifier's fields, find one that can be matched against.
+        for field in fields(model_id):
+            val = getattr(model_id, field.name)
+            if val is not None and val != "":
+                return val
+
+        return ""  # Cannot match against any model identifiers. Will ultimately use default.
 
 
 def _simplify_model_string(input: str) -> str:
 
@@ -12,12 +12,11 @@
 import json
 import os
 from collections.abc import Callable
-from typing import Any, Optional
+from typing import TYPE_CHECKING, Any, Optional
 
 import outlines
 import outlines_core
 import torch
-from alora.peft_model_alora import aLoRAPeftModelForCausalLM  # type: ignore
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
@@ -26,7 +25,6 @@
     PreTrainedTokenizer,
     set_seed,
 )
-from transformers.generation import GenerateDecoderOnlyOutput
 
 from mellea.backends import BaseModelSubclass
 from mellea.backends.aloras import Alora, AloraBackendMixin
@@ -52,6 +50,9 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement, LLMaJRequirement, Requirement
 
+if TYPE_CHECKING:
+    from alora.peft_model_alora import aLoRAPeftModelForCausalLM  # type: ignore
+
 assert outlines, "outlines needs to be present to make outlines_core work"
 
 """A configuration type for the unhappy path: Tokenizer * Model * torch device string
@@ -160,17 +161,17 @@ def __init__(
         self._cache = cache if cache is not None else SimpleLRUCache(3)
 
         # Used when running aLoRAs with this backend.
-        self._alora_model: aLoRAPeftModelForCausalLM | None = None
+        self._alora_model: "aLoRAPeftModelForCausalLM | None" = None  # noqa: UP037
         # ALoras that have been loaded for this model.
         self._aloras: dict[str, HFAlora] = {}
 
     @property
-    def alora_model(self) -> aLoRAPeftModelForCausalLM | None:
+    def alora_model(self) -> "aLoRAPeftModelForCausalLM | None":  # noqa: UP037
         """The ALora model."""
         return self._alora_model
 
     @alora_model.setter
-    def alora_model(self, model: aLoRAPeftModelForCausalLM | None):
+    def alora_model(self, model: "aLoRAPeftModelForCausalLM | None"):  # noqa: UP037
         """Sets the ALora model. This should only happen once in a backend's lifetime."""
         assert self._alora_model is None
         self._alora_model = model
@@ -239,7 +240,7 @@ def _generate_from_context_alora(
                     "This code block should not execute unless there is a 'constraint' alora loaded."
                 )
         # Construct the linearized context. This is very similar to normal generation.
-        linearized_ctx = ctx.linearize()
+        linearized_ctx = ctx.render_for_generation()
         assert linearized_ctx is not None and len(linearized_ctx) > 1
         msgs = self.formatter.to_chat_messages(linearized_ctx)
         user_message, assistant_message = msgs[-2].content, msgs[-1].content
@@ -286,7 +287,7 @@ def _generate_from_context_standard(
         # Otherwise, we will linearize the context and treat it as a raw input.
         decoded_result: str | None = None
         if ctx.is_chat_context:
-            linearized_ctx = ctx.linearize()
+            linearized_ctx = ctx.render_for_generation()
             assert linearized_ctx is not None, (
                 "If ctx.is_chat_context, then the context should be linearizable."
             )
@@ -624,6 +625,8 @@ def add_alora(self, alora: HFAlora):
         Args:
             alora (str): identifier for the ALora adapter
         """
+        from alora.peft_model_alora import aLoRAPeftModelForCausalLM  # type: ignore
+
         assert issubclass(alora.__class__, HFAlora), (
             f"cannot add an ALora of type {alora.__class__} to model; must inherit from {HFAlora.__class__}"
         )
 
@@ -263,7 +263,7 @@ def generate_from_chat_context(
         """
         model_opts = self._simplify_and_merge(model_options)
 
-        linearized_context = ctx.linearize()
+        linearized_context = ctx.render_for_generation()
         assert linearized_context is not None, (
             "Cannot generate from a non-linear context in a FormatterBackend."
         )
 
@@ -6,19 +6,16 @@
 import json
 from collections.abc import Callable
 from enum import Enum
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from urllib.parse import urlparse
 
 import openai
 import requests
 from huggingface_hub import snapshot_download
 from openai.types.chat import ChatCompletion
 from openai.types.completion import Completion
-from transformers import AutoTokenizer
-from transformers.tokenization_utils import PreTrainedTokenizer
 
 import mellea.backends.model_ids as model_ids
-from cli.serve.models import ChatCompletionMessage
 from mellea.backends import BaseModelSubclass
 from mellea.backends.aloras import Alora, AloraBackendMixin
 from mellea.backends.formatter import Formatter, FormatterBackend, TemplateFormatter
@@ -38,6 +35,9 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.requirement import ALoraRequirement, LLMaJRequirement, Requirement
 
+if TYPE_CHECKING:
+    from transformers.tokenization_utils import PreTrainedTokenizer
+
 openai_ollama_batching_error = "json: cannot unmarshal array into Go struct field CompletionRequest.prompt of type string"
 
 
@@ -328,7 +328,7 @@ def _generate_from_chat_context_alora(
                 )
 
         # Construct the linearized context. This is very similar to normal generation.
-        linearized_ctx = ctx.linearize()
+        linearized_ctx = ctx.render_for_generation()
         assert linearized_ctx is not None and len(linearized_ctx) > 1
         msgs = self.formatter.to_chat_messages(linearized_ctx)
         user_message, assistant_message = msgs[-2].content, msgs[-1].content
@@ -363,7 +363,7 @@ def _generate_from_chat_context_standard(
         model_opts = self._simplify_and_merge(
             model_options, is_chat_context=ctx.is_chat_context
         )
-        linearized_context = ctx.linearize()
+        linearized_context = ctx.render_for_generation()
         assert linearized_context is not None, (
             "Cannot generate from a non-linear context in a FormatterBackend."
         )
@@ -639,10 +639,12 @@ def get_aloras(self) -> list[Alora]:
 
     def apply_chat_template(self, chat: list[dict[str, str]]):
         """Apply the chat template for the model, if such a model is available (e.g., when it can deduce the huggingface model id)."""
+        from transformers import AutoTokenizer
+
         if not hasattr(self, "_tokenizer"):
             match _server_type(self._base_url):
                 case _ServerType.LOCALHOST:
-                    self._tokenizer: PreTrainedTokenizer = (
+                    self._tokenizer: "PreTrainedTokenizer" = (  # noqa: UP037
                         AutoTokenizer.from_pretrained(self._hf_model_id)
                     )
                 case _ServerType.OPENAI:
 
@@ -220,7 +220,7 @@ def generate_from_chat_context(
             model_options, is_chat_context=ctx.is_chat_context
         )
 
-        linearized_context = ctx.linearize()
+        linearized_context = ctx.render_for_generation()
         assert linearized_context is not None, (
             "Cannot generate from a non-linear context in a FormatterBackend."
         )
 
@@ -157,8 +157,13 @@ def _hash_for_kv_cache(self):
         ...
 
     @abc.abstractmethod
-    def linearize(self) -> list[Component | CBlock] | None:
-        """Provides a linear list of context components. This is not always possible, or None if that is not possible to construct."""
+    def render_for_generation(self) -> list[Component | CBlock] | None:
+        """Provides a linear list of context components to use for generation, or None if that is not possible to construct."""
+        ...
+
+    @abc.abstractmethod
+    def full_event_log(self) -> list[Component | CBlock]:
+        """Provides a list of all events stored in the context."""
         ...
 
     @abc.abstractmethod
@@ -262,6 +267,10 @@ def last_output_and_logs(
                 )
                 return last, log[0]
 
+    def full_event_log(self) -> list[Component | CBlock]:
+        """Returns the underlying _ctx."""
+        return self._ctx
+
     def last_turn(self):
         """The last input/output turn of the context."""
         if len(self._ctx) == 0:
@@ -335,8 +344,8 @@ def insert_turn(
         if turn.output:
             self.insert(turn.output, generate_logs=generate_logs)
 
-    def linearize(self) -> list[Component | CBlock] | None:
-        """Returns the underlying _ctx list."""
+    def render_for_generation(self) -> list[Component | CBlock] | None:
+        """Returns the underlying _ctx list for generation."""
         return self._ctx
 
     def is_chat_history(self):
@@ -372,7 +381,7 @@ def __init__(self):
         super().__init__()
         self.is_chat_context = True
 
-    def linearize(self) -> list[Component | CBlock] | None:
+    def render_for_generation(self) -> list[Component | CBlock] | None:
         """Uses _ctx ordering."""
         return []
 
 
@@ -111,10 +111,10 @@ def _to_msg(c: CBlock | Component | ModelOutputThunk) -> Message | None:
             case _:
                 return None
 
-    linearized_ctx = ctx.linearize()
-    if linearized_ctx is None:
+    all_ctx_events = ctx.full_event_log()
+    if all_ctx_events is None:
         raise Exception("Trying to cast a non-linear history into a chat history.")
     else:
-        history = [_to_msg(c) for c in linearized_ctx]
+        history = [_to_msg(c) for c in all_ctx_events]
         assert None not in history, "Could not render this context as a chat history."
         return history  # type: ignore
Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@ def react(`
`103`	`103`	`react_toolbox: ReactToolbox,`
`104`	`104`	`):`
`105`	`105`	`assert m.ctx.is_chat_context, "ReACT requires a chat context."`
`106`		`- test_ctx_lin = m.ctx.linearize()`
	`106`	`+ test_ctx_lin = m.ctx.render_for_generation()`
`107`	`107`	`assert test_ctx_lin is not None and len(test_ctx_lin) == 0, (`
`108`	`108`	`"ReACT expects a fresh context."`
`109`	`109`	`)`
Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@ def react(`
`101`	`101`	`react_toolbox: ReactToolbox,`
`102`	`102`	`):`
`103`	`103`	`assert m.ctx.is_chat_context, "ReACT requires a chat context."`
`104`		`- test_ctx_lin = m.ctx.linearize()`
	`104`	`+ test_ctx_lin = m.ctx.render_for_generation()`
`105`	`105`	`assert test_ctx_lin is not None and len(test_ctx_lin) == 0, (`
`106`	`106`	`"ReACT expects a fresh context."`
`107`	`107`	`)`
Original file line number	Diff line number	Diff line change
`@@ -263,7 +263,7 @@ def generate_from_chat_context(`
`263`	`263`	`"""`
`264`	`264`	`model_opts = self._simplify_and_merge(model_options)`
`265`	`265`
`266`		`- linearized_context = ctx.linearize()`
	`266`	`+ linearized_context = ctx.render_for_generation()`
`267`	`267`	`assert linearized_context is not None, (`
`268`	`268`	`"Cannot generate from a non-linear context in a FormatterBackend."`
`269`	`269`	`)`
Original file line number	Diff line number	Diff line change
`@@ -220,7 +220,7 @@ def generate_from_chat_context(`
`220`	`220`	`model_options, is_chat_context=ctx.is_chat_context`
`221`	`221`	`)`
`222`	`222`
`223`		`- linearized_context = ctx.linearize()`
	`223`	`+ linearized_context = ctx.render_for_generation()`
`224`	`224`	`assert linearized_context is not None, (`
`225`	`225`	`"Cannot generate from a non-linear context in a FormatterBackend."`
`226`	`226`	`)`