vamplabAI
diff --git a/‎.cursor/rules/core-modules.mdc‎
Lines changed: 0 additions & 1 deletion b/‎.cursor/rules/core-modules.mdc‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.webui_secret_key‎
Lines changed: 1 addition & 0 deletions b/‎.webui_secret_key‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/en/sgr-api/SGR-Agent's-Workflow.md‎
Lines changed: 9 additions & 5 deletions b/‎docs/en/sgr-api/SGR-Agent's-Workflow.md‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎docs/en/sgr-api/SGR-Description-API.md‎
Lines changed: 23 additions & 7 deletions b/‎docs/en/sgr-api/SGR-Description-API.md‎
Lines changed: 23 additions & 7 deletions
diff --git a/‎docs/ru/sgr-api/SGR-Agent's-Workflow.md‎
Lines changed: 9 additions & 5 deletions b/‎docs/ru/sgr-api/SGR-Agent's-Workflow.md‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎docs/ru/sgr-api/SGR-Description-API.md‎
Lines changed: 23 additions & 7 deletions b/‎docs/ru/sgr-api/SGR-Description-API.md‎
Lines changed: 23 additions & 7 deletions
diff --git a/‎sgr_agent_core/base_agent.py‎
Lines changed: 19 additions & 4 deletions b/‎sgr_agent_core/base_agent.py‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎sgr_agent_core/server/endpoints.py‎
Lines changed: 19 additions & 27 deletions b/‎sgr_agent_core/server/endpoints.py‎
Lines changed: 19 additions & 27 deletions
@@ -216,7 +216,6 @@ alwaysApply: true
 - `ChatCompletionRequest`: OpenAI-compatible request model
 - `MessagesList`: Root model for message lists with base64 truncation
 - `AgentStateResponse`: Agent state response model
-- `ClarificationRequest`: Clarification request model
 - `HealthResponse`: Health check response
 
 ### FastAPI Application (`sgr_agent_core/server/app.py`)
 
@@ -0,0 +1 @@
+UZhigVonEnhHdYTr
@@ -32,12 +32,16 @@ sequenceDiagram
             Note over Agent: State: WAITING_FOR_CLARIFICATION
             Agent->>Tools: Execute clarification tool
             Tools->>API: Return clarifying questions
-            API-->>Client: Stream clarification questions
-
-            Client->>API: POST /v1/chat/completions<br/>{"model": "agent_id", "messages": [...]}
-            API->>Agent: provide_clarification()
+            API-->>Client: Stream clarification questions with<br/>agent_id embedded in content
+
+            alt Stateless mode (full-context client)
+                Client->>API: POST /v1/chat/completions<br/>{"model": "sgr_agent", "messages": [<br/>  ..., "agent {id} started", ...]}<br/>Agent ID detected inside messages
+                API->>Agent: provide_clarification(replace=True)<br/>Conversation fully replaced
+            else Stateful mode (delta client)
+                Client->>API: POST /v1/chat/completions<br/>{"model": "agent_id", "messages": [new replies]}
+                API->>Agent: provide_clarification(replace=False)<br/>Messages appended to conversation
+            end
             Note over Agent: State: RESEARCHING
-            Agent->>Agent: Add clarification to context
 
         else Tool: GeneratePlan
             Agent->>Tools: Execute plan generation
 
@@ -117,9 +117,18 @@ Create a chat completion for research tasks. This is the main endpoint for inter
 - `max_tokens` (integer, optional, default: 1500): Maximum number of tokens for generation
 - `temperature` (float, optional, default: 0): Generation temperature (0.0-1.0). Lower values make output more deterministic
 
-**Special Behavior - Clarification Requests:**
+**Special Behavior - Resuming an Agent in Clarification State:**
 
-If `model` contains an agent ID (format: `{agent_name}_{uuid}`) and the agent is in `waiting_for_clarification` state, this endpoint will automatically route to the clarification handler instead of creating a new agent.
+This endpoint supports two ways to resume an agent that is waiting for clarification:
+
+| Mode | Trigger | Conversation handling |
+|---|---|---|
+| **Stateless (full-context)** | Agent ID found anywhere inside `messages` text | Agent's conversation is **replaced entirely** with the incoming `messages` |
+| **Stateful (delta)** | Agent ID passed as the `model` field value | Incoming `messages` are **appended** to the existing conversation |
+
+Use the **stateless mode** when integrating with a standard OpenAI-compatible chat UI that re-sends the full message history on every request. The agent detects its own ID in the message content, overwrites its conversation snapshot, and resumes execution.
+
+Use the **stateful mode** when your client tracks context itself and only sends new messages as a delta. Pass the agent ID (format: `{agent_name}_{uuid}`) as the `model` field value.
 
 **Response:**
 
@@ -321,7 +330,10 @@ curl http://localhost:8010/agents/sgr_agent_12345-67890-abcdef/state
 
 ## POST `/agents/{agent_id}/provide_clarification`
 
-Provide clarification to an agent that is waiting for input. Resumes agent execution after receiving clarification messages.
+Provide clarification to an agent that is waiting for input. Resumes agent execution after receiving clarification messages. This endpoint operates in **stateful (delta) mode**: the provided messages are *appended* to the agent's existing conversation history.
+
+!!! tip "Alternative via `/v1/chat/completions`"
+    If you are using a standard OpenAI-compatible client that re-sends the full message history on each turn, prefer the **stateless mode** of `POST /v1/chat/completions`: embed the agent ID anywhere in the message text (the format `agent {agent_id} started` is already included by the agent itself at startup) and send the full context as `messages`. The server will detect the ID, replace the agent's conversation with the incoming snapshot, and resume execution.
 
 **Path Parameters:**
 
@@ -342,7 +354,7 @@ Provide clarification to an agent that is waiting for input. Resumes agent execu
 
 **Request Parameters:**
 
-- `messages` (array, required): Clarification messages in OpenAI format (ChatCompletionMessageParam). Can contain multiple messages for complex clarifications.
+- `messages` (array, required): New clarification messages in OpenAI format (ChatCompletionMessageParam). These are appended to the existing conversation — send only the new user replies, not the full history.
 
 **Request:**
 
@@ -365,7 +377,7 @@ curl -X POST "http://localhost:8010/agents/sgr_agent_12345-67890-abcdef/provide_
 
 **Streaming Response:**
 
-Returns streaming response (SSE format) with continued research after clarification. The agent resumes execution from the point where it requested clarification.
+Returns a streaming SSE response with continued research after clarification. The agent resumes execution from the point where it requested clarification.
 
 **Error Responses:**
 
@@ -375,15 +387,19 @@ Returns streaming response (SSE format) with continued research after clarificat
     "detail": "Agent not found"
   }
   ```
+- `400 Bad Request`: Agent is not in `waiting_for_clarification` state
+  ```json
+  {
+    "detail": "Agent is not waiting for clarification"
+  }
+  ```
 - `500 Internal Server Error`: Error during clarification processing
   ```json
   {
     "detail": "Error message"
   }
   ```
 
-**Note:** This endpoint can also be accessed via POST `/v1/chat/completions` by using the agent ID as the `model` parameter when the agent is in `waiting_for_clarification` state.
-
 ## DELETE `/agents/{agent_id}`
 
 Cancel a running agent's execution and remove it from storage. If the agent is currently running, it will be cancelled first before removal.
 
@@ -32,12 +32,16 @@ sequenceDiagram
             Note over Agent: Состояние: WAITING_FOR_CLARIFICATION
             Agent->>Tools: Выполнить инструмент уточнения
             Tools->>API: Вернуть уточняющие вопросы
-            API-->>Client: Поток уточняющих вопросов
-
-            Client->>API: POST /v1/chat/completions<br/>{"model": "agent_id", "messages": [...]}
-            API->>Agent: provide_clarification()
+            API-->>Client: Поток уточняющих вопросов<br/>(содержит agent_id в тексте)
+
+            alt Режим stateless (клиент шлёт полный контекст)
+                Client->>API: POST /v1/chat/completions<br/>{"model": "sgr_agent", "messages": [<br/>  ..., "agent {id} started", ...]}<br/>ID агента найден внутри messages
+                API->>Agent: provide_clarification(replace=True)<br/>Разговор полностью заменяется
+            else Режим stateful (клиент шлёт дельту)
+                Client->>API: POST /v1/chat/completions<br/>{"model": "agent_id", "messages": [новые ответы]}
+                API->>Agent: provide_clarification(replace=False)<br/>Сообщения дописываются к разговору
+            end
             Note over Agent: Состояние: RESEARCHING
-            Agent->>Agent: Добавить уточнение в контекст
 
         else Инструмент: GeneratePlan
             Agent->>Tools: Выполнить генерацию плана
 
@@ -117,9 +117,18 @@ curl http://localhost:8010/v1/models
 - `max_tokens` (integer, опциональный, по умолчанию: 1500): Максимальное количество токенов для генерации
 - `temperature` (float, опциональный, по умолчанию: 0): Температура генерации (0.0-1.0). Меньшие значения делают вывод более детерминированным
 
-**Особое поведение - Запросы на уточнение:**
+**Особое поведение — Возобновление агента в состоянии ожидания уточнения:**
 
-Если `model` содержит ID агента (формат: `{agent_name}_{uuid}`) и агент находится в состоянии `waiting_for_clarification`, этот endpoint автоматически перенаправит запрос на обработчик уточнений вместо создания нового агента.
+Endpoint поддерживает два способа возобновить агент, находящийся в состоянии `waiting_for_clarification`:
+
+| Режим | Триггер | Обработка разговора |
+|---|---|---|
+| **Stateless (полный контекст)** | ID агента обнаружен где-либо в тексте `messages` | Разговор агента **полностью заменяется** входящими `messages` |
+| **Stateful (дельта)** | ID агента передан в поле `model` | Входящие `messages` **дописываются** к существующему разговору |
+
+Используйте **режим stateless**, когда интегрируетесь через стандартный OpenAI-совместимый клиент, который каждый раз пересылает полную историю сообщений. Агент автоматически обнаружит свой ID в тексте (сообщение вида `agent {agent_id} started` добавляется агентом в самом начале работы), заменит снимок разговора и возобновит выполнение.
+
+Используйте **режим stateful**, когда клиент сам управляет контекстом и отправляет только новые сообщения-дополнения. Передайте ID агента (формат: `{agent_name}_{uuid}`) в поле `model`.
 
 **Ответ:**
 
@@ -321,7 +330,10 @@ curl http://localhost:8010/agents/sgr_agent_12345-67890-abcdef/state
 
 ## POST `/agents/{agent_id}/provide_clarification`
 
-Предоставить уточнение агенту, который ожидает ввода. Возобновляет выполнение агента после получения сообщений уточнения.
+Предоставить уточнение агенту, который ожидает ввода. Возобновляет выполнение агента после получения сообщений уточнения. Endpoint работает в **stateful (дельта) режиме**: переданные сообщения *дописываются* к существующей истории разговора агента.
+
+!!! tip "Альтернатива через `/v1/chat/completions`"
+    Если вы используете стандартный OpenAI-совместимый клиент, который пересылает полную историю сообщений при каждом запросе, предпочтите **режим stateless** endpoint `POST /v1/chat/completions`: вставьте ID агента в любое место текста сообщений (агент сам добавляет `agent {agent_id} started` в начале работы) и передайте полный контекст в `messages`. Сервер обнаружит ID, заменит разговор агента новым снимком и возобновит выполнение.
 
 **Параметры пути:**
 
@@ -342,7 +354,7 @@ curl http://localhost:8010/agents/sgr_agent_12345-67890-abcdef/state
 
 **Параметры запроса:**
 
-- `messages` (array, обязательный): Сообщения уточнения в формате OpenAI (ChatCompletionMessageParam). Может содержать несколько сообщений для сложных уточнений.
+- `messages` (array, обязательный): Новые сообщения уточнения в формате OpenAI (ChatCompletionMessageParam). Дописываются к существующему разговору — передавайте только новые реплики пользователя, а не полную историю.
 
 **Запрос:**
 
@@ -365,7 +377,7 @@ curl -X POST "http://localhost:8010/agents/sgr_agent_12345-67890-abcdef/provide_
 
 **Потоковый ответ:**
 
-Возвращает потоковый ответ (формат SSE) с продолжением исследования после уточнения. Агент возобновляет выполнение с точки, где он запросил уточнение.
+Возвращает потоковый SSE-ответ с продолжением исследования после уточнения. Агент возобновляет выполнение с точки, где он запросил уточнение.
 
 **Ошибки:**
 
@@ -375,15 +387,19 @@ curl -X POST "http://localhost:8010/agents/sgr_agent_12345-67890-abcdef/provide_
     "detail": "Agent not found"
   }
   ```
+- `400 Bad Request`: Агент не находится в состоянии `waiting_for_clarification`
+  ```json
+  {
+    "detail": "Agent is not waiting for clarification"
+  }
+  ```
 - `500 Internal Server Error`: Ошибка при обработке уточнения
   ```json
   {
     "detail": "Сообщение об ошибке"
   }
   ```
 
-**Примечание:** Этот endpoint также доступен через POST `/v1/chat/completions`, используя ID агента в качестве параметра `model`, когда агент находится в состоянии `waiting_for_clarification`.
-
 ## DELETE `/agents/{agent_id}`
 
 Отменить выполнение запущенного агента и удалить его из хранилища. Если агент в данный момент выполняется, он будет сначала отменен, а затем удален.
 
@@ -80,9 +80,22 @@ def get_tool_config(self, tool_class: Type[BaseTool]) -> BaseModel | dict[str, A
         base = getattr(self.config, base_attr, None) if base_attr and self.config else None
         return config_from_kwargs(config_model, base, raw)
 
-    async def provide_clarification(self, messages: list[ChatCompletionMessageParam]):
-        """Receive clarification from an external source (e.g. user input) in
-        OpenAI messages format."""
+    async def provide_clarification(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        replace_conversation: bool = False,
+    ) -> None:
+        """Receive clarification from an external source in OpenAI messages
+        format.
+
+        Args:
+            messages: Clarification messages in OpenAI ChatCompletionMessageParam format.
+            replace_conversation: When True, clear the conversation
+                before applying messages (continuing stateful conversation / stateless mode).
+                Use this for stateless clients that re-send the full history on every turn.
+        """
+        if replace_conversation:
+            self.conversation = []
         self.conversation.extend(messages)
         self.conversation.append(
             {"role": "user", "content": PromptLoader.get_clarification_template(messages, self.config.prompts)}
@@ -262,8 +275,10 @@ async def _execute(self):
         This method contains the main agent execution logic. It is
         called by execute() which wraps it in an asyncio task.
         """
-        print("start messages: ", self.task_messages)
         self.logger.info(f"🚀 User provided {len(self.task_messages)} messages.")
+        init_message = f"Agent {self.id} started\n"
+        self.conversation.append({"role": "system", "content": init_message})
+        self.streaming_generator.add_content_delta(init_message, "0-start")
         try:
             while self._context.state not in AgentStatesEnum.FINISH_STATES.value:
                 self._context.iteration += 1
 
@@ -12,9 +12,10 @@
     AgentListResponse,
     AgentStateResponse,
     ChatCompletionRequest,
-    ClarificationRequest,
     HealthResponse,
+    MessagesRequest,
 )
+from sgr_agent_core.utils import is_agent_id
 
 logger = logging.getLogger(__name__)
 
@@ -150,15 +151,17 @@ async def get_available_models():
 
 
 @router.post("/agents/{agent_id}/provide_clarification")
-async def provide_clarification(agent_id: str, request: ClarificationRequest):
-    try:
-        agent = agents_storage.get(agent_id)
-        if not agent:
-            raise HTTPException(status_code=404, detail="Agent not found")
-
-        logger.info(f"Providing clarification to agent {agent.id}: {len(request.messages)} messages")
+async def provide_clarification(
+    request: MessagesRequest,
+    agent_id: str,
+) -> StreamingResponse:
+    messages = list(request.messages.root)
+    agent = agents_storage.get(agent_id)
+    if not agent:
+        raise HTTPException(status_code=404, detail="Agent not found")
 
-        await agent.provide_clarification(request.messages)
+    try:
+        await agent.provide_clarification(messages, replace_conversation=request.agent_id_from_messages is not None)
         return StreamingResponse(
             agent.streaming_generator.stream(),
             media_type="text/event-stream",
@@ -168,35 +171,24 @@ async def provide_clarification(agent_id: str, request: ClarificationRequest):
                 "X-Agent-ID": str(agent.id),
             },
         )
-
     except Exception as e:
         logger.error(f"Error completion: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 
 
-def _is_agent_id(model_str: str) -> bool:
-    """Check if the model string is an agent ID (contains underscore and UUID-
-    like format)."""
-    return "_" in model_str and len(model_str) > 20
-
-
 @router.post("/v1/chat/completions")
 async def create_chat_completion(request: ChatCompletionRequest):
     if not request.stream:
         raise HTTPException(status_code=501, detail="Only streaming responses are supported. Set 'stream=true'")
 
-    # Check if this is a clarification request for an existing agent
+    agent_id = request.agent_id_from_messages or (request.model if is_agent_id(request.model) else None)
     if (
-        request.model
-        and isinstance(request.model, str)
-        and _is_agent_id(request.model)
-        and request.model in agents_storage
-        and agents_storage[request.model]._context.state == AgentStatesEnum.WAITING_FOR_CLARIFICATION
+        agent_id is not None
+        and agent_id in agents_storage
+        and agents_storage[agent_id]._context.state == AgentStatesEnum.WAITING_FOR_CLARIFICATION
     ):
-        return await provide_clarification(
-            agent_id=request.model,
-            request=ClarificationRequest(messages=request.messages.root),
-        )
+        response = await provide_clarification(request, agent_id=agent_id)
+        return response
 
     try:
         agent_def = next(filter(lambda ad: ad.name == request.model, AgentFactory.get_definitions_list()), None)
@@ -210,7 +202,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
         logger.info(f"Created agent '{request.model}' with {len(request.messages)} messages")
 
         agents_storage[agent.id] = agent
-        asyncio.create_task(agent.execute())  # Starts execution, task stored in agent._execute_task
+        asyncio.create_task(agent.execute())
         return StreamingResponse(
             agent.streaming_generator.stream(),
             media_type="text/event-stream",