diff --git a/libs/langchain_v1/langchain/agents/middleware/summarization.py b/libs/langchain_v1/langchain/agents/middleware/summarization.py index 6ba6221206ca6..400df0e72e769 100644 --- a/libs/langchain_v1/langchain/agents/middleware/summarization.py +++ b/libs/langchain_v1/langchain/agents/middleware/summarization.py @@ -1,8 +1,9 @@ """Summarization middleware.""" import uuid -from collections.abc import Callable, Iterable -from typing import Any, cast +import warnings +from collections.abc import Callable, Iterable, Mapping +from typing import Any, Literal, cast from langchain_core.messages import ( AIMessage, @@ -51,13 +52,17 @@ {messages} """ # noqa: E501 -SUMMARY_PREFIX = "## Previous conversation summary:" - _DEFAULT_MESSAGES_TO_KEEP = 20 _DEFAULT_TRIM_TOKEN_LIMIT = 4000 _DEFAULT_FALLBACK_MESSAGE_COUNT = 15 _SEARCH_RANGE_FOR_TOOL_PAIRS = 5 +ContextFraction = tuple[Literal["fraction"], float] +ContextTokens = tuple[Literal["tokens"], int] +ContextMessages = tuple[Literal["messages"], int] + +ContextSize = ContextFraction | ContextTokens | ContextMessages + class SummarizationMiddleware(AgentMiddleware): """Summarizes conversation history when token limits are approached. @@ -70,34 +75,86 @@ class SummarizationMiddleware(AgentMiddleware): def __init__( self, model: str | BaseChatModel, - max_tokens_before_summary: int | None = None, - messages_to_keep: int = _DEFAULT_MESSAGES_TO_KEEP, + *, + trigger: ContextSize | list[ContextSize] | None = None, + keep: ContextSize = ("messages", _DEFAULT_MESSAGES_TO_KEEP), token_counter: TokenCounter = count_tokens_approximately, summary_prompt: str = DEFAULT_SUMMARY_PROMPT, - summary_prefix: str = SUMMARY_PREFIX, + trim_tokens_to_summarize: int | None = _DEFAULT_TRIM_TOKEN_LIMIT, + **deprecated_kwargs: Any, ) -> None: """Initialize the summarization middleware. Args: model: The language model to use for generating summaries. - max_tokens_before_summary: Token threshold to trigger summarization. - If `None`, summarization is disabled. - messages_to_keep: Number of recent messages to preserve after summarization. + trigger: One or more thresholds that trigger summarization. Provide a single + `ContextSize` tuple or a list of tuples, in which case summarization runs + when any threshold is breached. Examples: `("messages", 50)`, `("tokens", 3000)`, + `[("fraction", 0.8), ("messages", 100)]`. + keep: Context retention policy applied after summarization. Provide a + `ContextSize` tuple to specify how much history to preserve. Defaults to + keeping the most recent 20 messages. Examples: `("messages", 20)`, + `("tokens", 3000)`, or `("fraction", 0.3)`. token_counter: Function to count tokens in messages. summary_prompt: Prompt template for generating summaries. - summary_prefix: Prefix added to system message when including summary. + trim_tokens_to_summarize: Maximum tokens to keep when preparing messages for the + summarization call. Pass `None` to skip trimming entirely. """ + # Handle deprecated parameters + if "max_tokens_before_summary" in deprecated_kwargs: + value = deprecated_kwargs["max_tokens_before_summary"] + warnings.warn( + "max_tokens_before_summary is deprecated. Use trigger=('tokens', value) instead.", + DeprecationWarning, + stacklevel=2, + ) + if trigger is None and value is not None: + trigger = ("tokens", value) + + if "messages_to_keep" in deprecated_kwargs: + value = deprecated_kwargs["messages_to_keep"] + warnings.warn( + "messages_to_keep is deprecated. Use keep=('messages', value) instead.", + DeprecationWarning, + stacklevel=2, + ) + if keep == ("messages", _DEFAULT_MESSAGES_TO_KEEP): + keep = ("messages", value) + super().__init__() if isinstance(model, str): model = init_chat_model(model) self.model = model - self.max_tokens_before_summary = max_tokens_before_summary - self.messages_to_keep = messages_to_keep + if trigger is None: + self.trigger: ContextSize | list[ContextSize] | None = None + trigger_conditions: list[ContextSize] = [] + elif isinstance(trigger, list): + validated_list = [self._validate_context_size(item, "trigger") for item in trigger] + self.trigger = validated_list + trigger_conditions = validated_list + else: + validated = self._validate_context_size(trigger, "trigger") + self.trigger = validated + trigger_conditions = [validated] + self._trigger_conditions = trigger_conditions + + self.keep = self._validate_context_size(keep, "keep") self.token_counter = token_counter self.summary_prompt = summary_prompt - self.summary_prefix = summary_prefix + self.trim_tokens_to_summarize = trim_tokens_to_summarize + + requires_profile = any(condition[0] == "fraction" for condition in self._trigger_conditions) + if self.keep[0] == "fraction": + requires_profile = True + if requires_profile and self._get_profile_limits() is None: + msg = ( + "Model profile information is required to use fractional token limits. " + 'pip install "langchain[model-profiles]" or use absolute token counts ' + "instead." + ) + raise ValueError(msg) def before_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | None: # noqa: ARG002 """Process messages before model invocation, potentially triggering summarization.""" @@ -105,13 +162,10 @@ def before_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | self._ensure_message_ids(messages) total_tokens = self.token_counter(messages) - if ( - self.max_tokens_before_summary is not None - and total_tokens < self.max_tokens_before_summary - ): + if not self._should_summarize(messages, total_tokens): return None - cutoff_index = self._find_safe_cutoff(messages) + cutoff_index = self._determine_cutoff_index(messages) if cutoff_index <= 0: return None @@ -129,6 +183,124 @@ def before_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | ] } + def _should_summarize(self, messages: list[AnyMessage], total_tokens: int) -> bool: + """Determine whether summarization should run for the current token usage.""" + if not self._trigger_conditions: + return False + + for kind, value in self._trigger_conditions: + if kind == "messages" and len(messages) >= value: + return True + if kind == "tokens" and total_tokens >= value: + return True + if kind == "fraction": + max_input_tokens = self._get_profile_limits() + if max_input_tokens is None: + continue + threshold = int(max_input_tokens * value) + if threshold <= 0: + threshold = 1 + if total_tokens >= threshold: + return True + return False + + def _determine_cutoff_index(self, messages: list[AnyMessage]) -> int: + """Choose cutoff index respecting retention configuration.""" + kind, value = self.keep + if kind in {"tokens", "fraction"}: + token_based_cutoff = self._find_token_based_cutoff(messages) + if token_based_cutoff is not None: + return token_based_cutoff + # None cutoff -> model profile data not available (caught in __init__ but + # here for safety), fallback to message count + return self._find_safe_cutoff(messages, _DEFAULT_MESSAGES_TO_KEEP) + return self._find_safe_cutoff(messages, cast("int", value)) + + def _find_token_based_cutoff(self, messages: list[AnyMessage]) -> int | None: + """Find cutoff index based on target token retention.""" + if not messages: + return 0 + + kind, value = self.keep + if kind == "fraction": + max_input_tokens = self._get_profile_limits() + if max_input_tokens is None: + return None + target_token_count = int(max_input_tokens * value) + elif kind == "tokens": + target_token_count = int(value) + else: + return None + + if target_token_count <= 0: + target_token_count = 1 + + if self.token_counter(messages) <= target_token_count: + return 0 + + # Use binary search to identify the earliest message index that keeps the + # suffix within the token budget. + left, right = 0, len(messages) + cutoff_candidate = len(messages) + max_iterations = len(messages).bit_length() + 1 + for _ in range(max_iterations): + if left >= right: + break + + mid = (left + right) // 2 + if self.token_counter(messages[mid:]) <= target_token_count: + cutoff_candidate = mid + right = mid + else: + left = mid + 1 + + if cutoff_candidate == len(messages): + cutoff_candidate = left + + if cutoff_candidate >= len(messages): + if len(messages) == 1: + return 0 + cutoff_candidate = len(messages) - 1 + + for i in range(cutoff_candidate, -1, -1): + if self._is_safe_cutoff_point(messages, i): + return i + + return 0 + + def _get_profile_limits(self) -> int | None: + """Retrieve max input token limit from the model profile.""" + try: + profile = self.model.profile + except (AttributeError, ImportError): + return None + + if not isinstance(profile, Mapping): + return None + + max_input_tokens = profile.get("max_input_tokens") + + if not isinstance(max_input_tokens, int): + return None + + return max_input_tokens + + def _validate_context_size(self, context: ContextSize, parameter_name: str) -> ContextSize: + """Validate context configuration tuples.""" + kind, value = context + if kind == "fraction": + if not 0 < value <= 1: + msg = f"Fractional {parameter_name} values must be between 0 and 1, got {value}." + raise ValueError(msg) + elif kind in {"tokens", "messages"}: + if value <= 0: + msg = f"{parameter_name} thresholds must be greater than 0, got {value}." + raise ValueError(msg) + else: + msg = f"Unsupported context size type {kind} for {parameter_name}." + raise ValueError(msg) + return context + def _build_new_messages(self, summary: str) -> list[HumanMessage]: return [ HumanMessage(content=f"Here is a summary of the conversation to date:\n\n{summary}") @@ -151,16 +323,16 @@ def _partition_messages( return messages_to_summarize, preserved_messages - def _find_safe_cutoff(self, messages: list[AnyMessage]) -> int: + def _find_safe_cutoff(self, messages: list[AnyMessage], messages_to_keep: int) -> int: """Find safe cutoff point that preserves AI/Tool message pairs. Returns the index where messages can be safely cut without separating related AI and Tool messages. Returns 0 if no safe cutoff is found. """ - if len(messages) <= self.messages_to_keep: + if len(messages) <= messages_to_keep: return 0 - target_cutoff = len(messages) - self.messages_to_keep + target_cutoff = len(messages) - messages_to_keep for i in range(target_cutoff, -1, -1): if self._is_safe_cutoff_point(messages, i): @@ -229,16 +401,18 @@ def _create_summary(self, messages_to_summarize: list[AnyMessage]) -> str: try: response = self.model.invoke(self.summary_prompt.format(messages=trimmed_messages)) - return cast("str", response.content).strip() + return response.text.strip() except Exception as e: # noqa: BLE001 return f"Error generating summary: {e!s}" def _trim_messages_for_summary(self, messages: list[AnyMessage]) -> list[AnyMessage]: """Trim messages to fit within summary generation limits.""" try: + if self.trim_tokens_to_summarize is None: + return messages return trim_messages( messages, - max_tokens=_DEFAULT_TRIM_TOKEN_LIMIT, + max_tokens=self.trim_tokens_to_summarize, token_counter=self.token_counter, start_on="human", strategy="last", diff --git a/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py b/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py index 02fa96e6b65af..64045e6fa9e7e 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py +++ b/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py @@ -1,7 +1,7 @@ import warnings from collections.abc import Awaitable, Callable from types import ModuleType -from typing import Any +from typing import Any, TYPE_CHECKING from unittest.mock import patch import sys @@ -10,6 +10,7 @@ from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import ( AIMessage, + AnyMessage, HumanMessage, RemoveMessage, ToolCall, @@ -64,6 +65,9 @@ from .messages import _AnyIdHumanMessage, _AnyIdToolMessage from .model import FakeToolCallingModel +if TYPE_CHECKING: + from langchain_model_profiles import ModelProfile + def test_create_agent_diagram( snapshot: SnapshotAssertion, @@ -1030,17 +1034,19 @@ def test_summarization_middleware_initialization() -> None: model = FakeToolCallingModel() middleware = SummarizationMiddleware( model=model, - max_tokens_before_summary=1000, - messages_to_keep=10, + trigger=("tokens", 1000), + keep=("messages", 10), summary_prompt="Custom prompt: {messages}", - summary_prefix="Custom prefix:", ) assert middleware.model == model - assert middleware.max_tokens_before_summary == 1000 - assert middleware.messages_to_keep == 10 + assert middleware.trigger == ("tokens", 1000) + assert middleware.keep == ("messages", 10) assert middleware.summary_prompt == "Custom prompt: {messages}" - assert middleware.summary_prefix == "Custom prefix:" + assert middleware.trim_tokens_to_summarize == 4000 + + with pytest.raises(ValueError): + SummarizationMiddleware(model=model, keep=("fraction", 0.5)) # no model profile # Test with string model with patch( @@ -1054,10 +1060,10 @@ def test_summarization_middleware_initialization() -> None: def test_summarization_middleware_no_summarization_cases() -> None: """Test SummarizationMiddleware when summarization is not needed or disabled.""" model = FakeToolCallingModel() - middleware = SummarizationMiddleware(model=model, max_tokens_before_summary=1000) + middleware = SummarizationMiddleware(model=model, trigger=("tokens", 1000)) # Test when summarization is disabled - middleware_disabled = SummarizationMiddleware(model=model, max_tokens_before_summary=None) + middleware_disabled = SummarizationMiddleware(model=model, trigger=None) state = {"messages": [HumanMessage(content="Hello"), AIMessage(content="Hi")]} result = middleware_disabled.before_model(state, None) assert result is None @@ -1074,7 +1080,7 @@ def mock_token_counter(messages): def test_summarization_middleware_helper_methods() -> None: """Test SummarizationMiddleware helper methods.""" model = FakeToolCallingModel() - middleware = SummarizationMiddleware(model=model, max_tokens_before_summary=1000) + middleware = SummarizationMiddleware(model=model, trigger=("tokens", 1000)) # Test message ID assignment messages = [HumanMessage(content="Hello"), AIMessage(content="Hi")] @@ -1121,7 +1127,7 @@ def test_summarization_middleware_tool_call_safety() -> None: """Test SummarizationMiddleware tool call safety logic.""" model = FakeToolCallingModel() middleware = SummarizationMiddleware( - model=model, max_tokens_before_summary=1000, messages_to_keep=3 + model=model, trigger=("tokens", 1000), keep=("messages", 3) ) # Test safe cutoff point detection with tool calls @@ -1159,7 +1165,7 @@ def _generate(self, messages, **kwargs): def _llm_type(self): return "mock" - middleware = SummarizationMiddleware(model=MockModel(), max_tokens_before_summary=1000) + middleware = SummarizationMiddleware(model=MockModel(), trigger=("tokens", 1000)) # Test normal summary creation messages = [HumanMessage(content="Hello"), AIMessage(content="Hi")] @@ -1182,10 +1188,246 @@ def _generate(self, messages, **kwargs): def _llm_type(self): return "mock" - middleware_error = SummarizationMiddleware(model=ErrorModel(), max_tokens_before_summary=1000) + middleware_error = SummarizationMiddleware(model=ErrorModel(), trigger=("tokens", 1000)) summary = middleware_error._create_summary(messages) assert "Error generating summary: Model error" in summary + # Test we raise warning if max_tokens_before_summary or messages_to_keep is specified + with pytest.warns(DeprecationWarning, match="max_tokens_before_summary is deprecated"): + SummarizationMiddleware(model=MockModel(), max_tokens_before_summary=500) + with pytest.warns(DeprecationWarning, match="messages_to_keep is deprecated"): + SummarizationMiddleware(model=MockModel(), messages_to_keep=5) + + +def test_summarization_middleware_trim_limit_none_keeps_all_messages() -> None: + """Verify disabling trim limit preserves full message sequence.""" + + class MockModel(BaseChatModel): + def _generate(self, messages, **kwargs): + return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))]) + + @property + def _llm_type(self): + return "mock" + + messages = [HumanMessage(content=str(i)) for i in range(10)] + middleware = SummarizationMiddleware( + model=MockModel(), + trim_tokens_to_summarize=None, + ) + middleware.token_counter = lambda msgs: len(msgs) + + trimmed = middleware._trim_messages_for_summary(messages) + assert trimmed is messages + + +def test_summarization_middleware_profile_inference_triggers_summary() -> None: + """Ensure automatic profile inference triggers summarization when limits are exceeded.""" + + class ProfileModel(BaseChatModel): + def _generate(self, messages, **kwargs): + return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))]) + + @property + def _llm_type(self) -> str: + return "mock" + + @property + def profile(self) -> "ModelProfile": + return {"max_input_tokens": 1000} + + token_counter = lambda messages: len(messages) * 200 + + middleware = SummarizationMiddleware( + model=ProfileModel(), + trigger=("fraction", 0.81), + keep=("fraction", 0.5), + token_counter=token_counter, + ) + + state = { + "messages": [ + HumanMessage(content="Message 1"), + AIMessage(content="Message 2"), + HumanMessage(content="Message 3"), + AIMessage(content="Message 4"), + ] + } + + # Test we don't engage summarization + # total_tokens = 4 * 200 = 800 + # max_output_tokens = 150 + # buffer_tokens = 50 + # 800 + 150 + 50 <= 1000 -> summarization not triggered + result = middleware.before_model(state, None) + assert result is None + + # Engage summarization + # 800 + 150 + 51 > 1000 + middleware = SummarizationMiddleware( + model=ProfileModel(), + trigger=("fraction", 0.80), + keep=("fraction", 0.5), + token_counter=token_counter, + ) + result = middleware.before_model(state, None) + assert result is not None + assert isinstance(result["messages"][0], RemoveMessage) + summary_message = result["messages"][1] + assert isinstance(summary_message, HumanMessage) + assert summary_message.text.startswith("Here is a summary of the conversation") + assert len(result["messages"][2:]) == 2 # Preserved messages + assert [message.content for message in result["messages"][2:]] == [ + "Message 3", + "Message 4", + ] + + # With tokens_to_keep = 0.6 the target token allowance becomes 600, + # so the cutoff shifts to keep the last three messages instead of two. + middleware = SummarizationMiddleware( + model=ProfileModel(), + trigger=("fraction", 0.80), + keep=("fraction", 0.6), + token_counter=token_counter, + ) + result = middleware.before_model(state, None) + assert result is not None + assert [message.content for message in result["messages"][2:]] == [ + "Message 2", + "Message 3", + "Message 4", + ] + + # Once tokens_to_keep reaches 0.8 the inferred limit equals the full + # context (target tokens = 800), so token-based retention keeps everything + # and summarization is skipped entirely. + middleware = SummarizationMiddleware( + model=ProfileModel(), + trigger=("fraction", 0.80), + keep=("fraction", 0.8), + token_counter=token_counter, + ) + assert middleware.before_model(state, None) is None + + # Test with tokens_to_keep as absolute int value + middleware_int = SummarizationMiddleware( + model=ProfileModel(), + trigger=("fraction", 0.80), + keep=("tokens", 400), # Keep exactly 400 tokens (2 messages) + token_counter=token_counter, + ) + result = middleware_int.before_model(state, None) + assert result is not None + assert [message.content for message in result["messages"][2:]] == [ + "Message 3", + "Message 4", + ] + + # Test with tokens_to_keep as larger int value + middleware_int_large = SummarizationMiddleware( + model=ProfileModel(), + trigger=("fraction", 0.80), + keep=("tokens", 600), # Keep 600 tokens (3 messages) + token_counter=token_counter, + ) + result = middleware_int_large.before_model(state, None) + assert result is not None + assert [message.content for message in result["messages"][2:]] == [ + "Message 2", + "Message 3", + "Message 4", + ] + + +def test_summarization_middleware_token_retention_pct_respects_tool_pairs() -> None: + """Ensure token retention keeps pairs together even if exceeding target tokens.""" + + class ProfileModel(BaseChatModel): + def _generate(self, messages, **kwargs): + return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))]) + + @property + def _llm_type(self) -> str: + return "mock" + + @property + def profile(self) -> "ModelProfile": + return {"max_input_tokens": 1000} + + def token_counter(messages): + return sum(len(getattr(message, "content", "")) for message in messages) + + middleware = SummarizationMiddleware( + model=ProfileModel(), + trigger=("fraction", 0.1), + keep=("fraction", 0.5), + ) + middleware.token_counter = token_counter + + messages: list[AnyMessage] = [ + HumanMessage(content="H" * 300), + AIMessage( + content="A" * 200, + tool_calls=[{"name": "test", "args": {}, "id": "call-1"}], + ), + ToolMessage(content="T" * 50, tool_call_id="call-1"), + HumanMessage(content="H" * 180), + HumanMessage(content="H" * 160), + ] + + state = {"messages": messages} + result = middleware.before_model(state, None) + assert result is not None + + preserved_messages = result["messages"][2:] + assert preserved_messages == messages[1:] + + target_token_count = int(1000 * 0.5) + preserved_tokens = middleware.token_counter(preserved_messages) + + # Tool pair retention can exceed the target token count but should keep the pair intact. + assert preserved_tokens > target_token_count + + +def test_summarization_middleware_profile_inference_fallbacks() -> None: + """Ensure automatic profile inference falls back when profiles are unavailable.""" + + class ImportErrorProfileModel(BaseChatModel): + def _generate(self, messages, **kwargs): + raise NotImplementedError + + @property + def _llm_type(self) -> str: + return "mock" + + @property + def profile(self): + raise ImportError("Profile not available") + + class MissingKeysProfileModel(BaseChatModel): + def _generate(self, messages, **kwargs): + raise NotImplementedError + + @property + def _llm_type(self) -> str: + return "mock" + + @property + def profile(self) -> "ModelProfile": + return {"max_input_tokens": 1000} + + models = [ + ImportErrorProfileModel(), + MissingKeysProfileModel(), + ] + + for model in models: + middleware = SummarizationMiddleware(model=model, keep=("messages", 1)) + middleware.token_counter = lambda _messages: 10_000 + state = {"messages": [HumanMessage(content=str(i)) for i in range(3)]} + result = middleware.before_model(state, None) + assert result is None + def test_summarization_middleware_full_workflow() -> None: """Test SummarizationMiddleware complete summarization workflow.""" @@ -1201,9 +1443,11 @@ def _generate(self, messages, **kwargs): def _llm_type(self): return "mock" - middleware = SummarizationMiddleware( - model=MockModel(), max_tokens_before_summary=1000, messages_to_keep=2 - ) + with pytest.warns(DeprecationWarning): + # keep test for functionality + middleware = SummarizationMiddleware( + model=MockModel(), max_tokens_before_summary=1000, messages_to_keep=2 + ) # Mock high token count to trigger summarization def mock_token_counter(messages): @@ -1241,6 +1485,62 @@ def mock_token_counter(messages): assert "Generated summary" in summary_message.content +def test_summarization_middleware_messages_before_summary() -> None: + """Test SummarizationMiddleware with messages_before_summary parameter.""" + + class MockModel(BaseChatModel): + def invoke(self, prompt): + return AIMessage(content="Generated summary") + + def _generate(self, messages, **kwargs): + return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))]) + + @property + def _llm_type(self): + return "mock" + + # Test that summarization is triggered when message count reaches threshold + middleware = SummarizationMiddleware( + model=MockModel(), trigger=("messages", 5), keep=("messages", 2) + ) + + # Below threshold - no summarization + messages_below = [ + HumanMessage(content="1"), + HumanMessage(content="2"), + HumanMessage(content="3"), + HumanMessage(content="4"), + ] + state_below = {"messages": messages_below} + result = middleware.before_model(state_below, None) + assert result is None + + # At threshold - should trigger summarization + messages_at_threshold = [ + HumanMessage(content="1"), + HumanMessage(content="2"), + HumanMessage(content="3"), + HumanMessage(content="4"), + HumanMessage(content="5"), + ] + state_at = {"messages": messages_at_threshold} + result = middleware.before_model(state_at, None) + assert result is not None + assert "messages" in result + + # Above threshold - should also trigger summarization + messages_above = messages_at_threshold + [HumanMessage(content="6")] + state_above = {"messages": messages_above} + result = middleware.before_model(state_above, None) + assert result is not None + assert "messages" in result + + # Test with both parameters disabled + middleware_disabled = SummarizationMiddleware(model=MockModel(), trigger=None) + result = middleware_disabled.before_model(state_above, None) + assert result is None + + def test_on_model_call() -> None: class ModifyMiddleware(AgentMiddleware): def wrap_model_call(