From cd629d095364de164a0bae9a0a704a2e5216d398 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 22:57:48 +0000
Subject: [PATCH 1/5] feat: Sync SDK with latest OpenAPI specification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add FinishReason enum (Stop, Length, ToolCalls, ContentFilter, FunctionCall)
- Add ChatCompletionMessageToolCallChunk with index field for streaming
- Add ChatCompletionTokenLogprob, TopLogprob, ChoiceLogprobs for logprobs support
- Add reasoning_content and refusal fields to ChatCompletionStreamDelta
- Add reasoning_content field to Message struct
- Update Model.served_by from Option<String> to Provider enum
- Make all Model fields required (object, created, owned_by, served_by)
- Add A2AAgentCard and ListAgentsResponse types for A2A protocol support
- Add list_agents() and get_agent(id) methods to InferenceGatewayAPI trait
- Add NotFound error variant for 404 responses
- Add tests for A2A endpoints

Co-authored-by: Eden Reich <edenreich@users.noreply.github.com>

🤖 Generated with [Claude Code](https://claude.com/claude-code)
---
 src/lib.rs | 386 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 369 insertions(+), 17 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
index 492ab7a..733d4ce 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -30,6 +30,9 @@ pub enum GatewayError {
     #[error("Forbidden: {0}")]
     Forbidden(String),
 
+    #[error("Not found: {0}")]
+    NotFound(String),
+
     #[error("Bad request: {0}")]
     BadRequest(String),
 
@@ -66,13 +69,13 @@ pub struct Model {
     /// The model identifier
     pub id: String,
     /// The object type, usually "model"
-    pub object: Option<String>,
+    pub object: String,
     /// The Unix timestamp (in seconds) of when the model was created
-    pub created: Option<i64>,
+    pub created: i64,
     /// The organization that owns the model
-    pub owned_by: Option<String>,
+    pub owned_by: String,
     /// The provider that serves the model
-    pub served_by: Option<String>,
+    pub served_by: Provider,
 }
 
 /// Response structure for listing models
@@ -110,6 +113,59 @@ pub struct ListToolsResponse {
     pub data: Vec<MCPTool>,
 }
 
+/// An A2A agent card definition
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct A2AAgentCard {
+    /// Unique identifier for the agent (base64-encoded SHA256 hash of the agent URL)
+    pub id: String,
+    /// Human readable name of the agent
+    pub name: String,
+    /// A human-readable description of the agent
+    pub description: String,
+    /// A URL to the address the agent is hosted at
+    pub url: String,
+    /// The version of the agent
+    pub version: String,
+    /// Optional capabilities supported by the agent
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub capabilities: Option<Value>,
+    /// The set of interaction modes that the agent supports across all skills
+    #[serde(rename = "defaultInputModes")]
+    pub default_input_modes: Vec<String>,
+    /// Supported media types for output
+    #[serde(rename = "defaultOutputModes")]
+    pub default_output_modes: Vec<String>,
+    /// Skills are a unit of capability that an agent can perform
+    pub skills: Vec<Value>,
+    /// A URL to documentation for the agent
+    #[serde(rename = "documentationUrl", skip_serializing_if = "Option::is_none")]
+    pub documentation_url: Option<String>,
+    /// A URL to an icon for the agent
+    #[serde(rename = "iconUrl", skip_serializing_if = "Option::is_none")]
+    pub icon_url: Option<String>,
+    /// The service provider of the agent
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub provider: Option<Value>,
+    /// Security requirements for contacting the agent
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub security: Option<Vec<Value>>,
+    /// Security scheme details used for authenticating with this agent
+    #[serde(rename = "securitySchemes", skip_serializing_if = "Option::is_none")]
+    pub security_schemes: Option<Value>,
+    /// True if the agent supports providing an extended agent card when the user is authenticated
+    #[serde(rename = "supportsAuthenticatedExtendedCard", skip_serializing_if = "Option::is_none")]
+    pub supports_authenticated_extended_card: Option<bool>,
+}
+
+/// Response structure for listing A2A agents
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ListAgentsResponse {
+    /// Response object type, always "list"
+    pub object: String,
+    /// Array of available A2A agents
+    pub data: Vec<A2AAgentCard>,
+}
+
 /// Supported LLM providers
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)]
 #[serde(rename_all = "lowercase")]
@@ -199,7 +255,10 @@ pub struct Message {
     /// Unique identifier of the tool call
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_call_id: Option<String>,
-    /// Reasoning behind the message
+    /// The reasoning content of the message
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning_content: Option<String>,
+    /// The reasoning of the message (same as reasoning_content)
     #[serde(skip_serializing_if = "Option::is_none")]
     pub reasoning: Option<String>,
 }
@@ -279,21 +338,41 @@ struct CreateChatCompletionRequest {
     max_tokens: Option<i32>,
 }
 
-/// A tool call in the response
+/// A tool call chunk in streaming responses
 #[derive(Debug, Deserialize, Clone)]
-pub struct ToolCallResponse {
+pub struct ChatCompletionMessageToolCallChunk {
+    /// Index of the tool call in the array
+    pub index: i32,
     /// Unique identifier of the tool call
-    pub id: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
     /// Type of tool that was called
-    #[serde(rename = "type")]
-    pub r#type: ToolType,
+    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
+    pub r#type: Option<String>,
     /// Function that the LLM wants to call
-    pub function: ChatCompletionMessageToolCallFunction,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub function: Option<ChatCompletionMessageToolCallFunction>,
+}
+
+/// The reason the model stopped generating tokens
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum FinishReason {
+    /// Model hit a natural stop point or a provided stop sequence
+    Stop,
+    /// Maximum number of tokens specified in the request was reached
+    Length,
+    /// Model called a tool
+    ToolCalls,
+    /// Content was omitted due to a flag from content filters
+    ContentFilter,
+    /// Function call (deprecated, use tool_calls)
+    FunctionCall,
 }
 
 #[derive(Debug, Deserialize, Clone)]
 pub struct ChatCompletionChoice {
-    pub finish_reason: String,
+    pub finish_reason: FinishReason,
     pub message: Message,
     pub index: i32,
 }
@@ -329,6 +408,39 @@ pub struct CreateChatCompletionStreamResponse {
     pub usage: Option<CompletionUsage>,
 }
 
+/// Token log probability information
+#[derive(Debug, Deserialize, Clone)]
+pub struct ChatCompletionTokenLogprob {
+    /// The token
+    pub token: String,
+    /// The log probability of this token
+    pub logprob: f64,
+    /// UTF-8 bytes representation of the token
+    pub bytes: Option<Vec<i32>>,
+    /// List of the most likely tokens and their log probability
+    pub top_logprobs: Vec<TopLogprob>,
+}
+
+/// Top log probability entry
+#[derive(Debug, Deserialize, Clone)]
+pub struct TopLogprob {
+    /// The token
+    pub token: String,
+    /// The log probability of this token
+    pub logprob: f64,
+    /// UTF-8 bytes representation of the token
+    pub bytes: Option<Vec<i32>>,
+}
+
+/// Log probability information for a choice
+#[derive(Debug, Deserialize, Clone)]
+pub struct ChoiceLogprobs {
+    /// A list of message content tokens with log probability information
+    pub content: Option<Vec<ChatCompletionTokenLogprob>>,
+    /// A list of message refusal tokens with log probability information
+    pub refusal: Option<Vec<ChatCompletionTokenLogprob>>,
+}
+
 /// Choice in a streaming completion response
 #[derive(Debug, Deserialize, Clone)]
 pub struct ChatCompletionStreamChoice {
@@ -338,7 +450,10 @@ pub struct ChatCompletionStreamChoice {
     pub index: i32,
     /// The reason the model stopped generating tokens
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub finish_reason: Option<String>,
+    pub finish_reason: Option<FinishReason>,
+    /// Log probability information for the choice
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub logprobs: Option<ChoiceLogprobs>,
 }
 
 /// Delta content for streaming responses
@@ -350,9 +465,18 @@ pub struct ChatCompletionStreamDelta {
     /// Content of the message delta
     #[serde(skip_serializing_if = "Option::is_none")]
     pub content: Option<String>,
+    /// The reasoning content of the chunk message
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning_content: Option<String>,
+    /// The reasoning of the chunk message (same as reasoning_content)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<String>,
     /// Tool calls for this delta
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_calls: Option<Vec<ToolCallResponse>>,
+    pub tool_calls: Option<Vec<ChatCompletionMessageToolCallChunk>>,
+    /// The refusal message generated by the model
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub refusal: Option<String>,
 }
 
 /// Usage statistics for the completion request
@@ -468,6 +592,37 @@ pub trait InferenceGatewayAPI {
     /// A list of available MCP tools. Only accessible when EXPOSE_MCP is enabled.
     fn list_tools(&self) -> impl Future<Output = Result<ListToolsResponse, GatewayError>> + Send;
 
+    /// Lists available A2A agents
+    ///
+    /// # Errors
+    /// - Returns [`GatewayError::Unauthorized`] if authentication fails
+    /// - Returns [`GatewayError::Forbidden`] if A2A is not exposed
+    /// - Returns [`GatewayError::InternalError`] if the server has an error
+    /// - Returns [`GatewayError::Other`] for other errors
+    ///
+    /// # Returns
+    /// A list of available A2A agents. Only accessible when EXPOSE_A2A is enabled.
+    fn list_agents(&self) -> impl Future<Output = Result<ListAgentsResponse, GatewayError>> + Send;
+
+    /// Gets a specific A2A agent by ID
+    ///
+    /// # Arguments
+    /// * `id` - The unique identifier of the agent
+    ///
+    /// # Errors
+    /// - Returns [`GatewayError::Unauthorized`] if authentication fails
+    /// - Returns [`GatewayError::Forbidden`] if A2A is not exposed
+    /// - Returns [`GatewayError::NotFound`] if the agent is not found
+    /// - Returns [`GatewayError::InternalError`] if the server has an error
+    /// - Returns [`GatewayError::Other`] for other errors
+    ///
+    /// # Returns
+    /// The A2A agent card. Only accessible when EXPOSE_A2A is enabled.
+    fn get_agent(
+        &self,
+        id: &str,
+    ) -> impl Future<Output = Result<A2AAgentCard, GatewayError>> + Send;
+
     /// Checks if the API is available
     fn health_check(&self) -> impl Future<Output = Result<bool, GatewayError>> + Send;
 }
@@ -741,6 +896,72 @@ impl InferenceGatewayAPI for InferenceGatewayClient {
         }
     }
 
+    async fn list_agents(&self) -> Result<ListAgentsResponse, GatewayError> {
+        let url = format!("{}/a2a/agents", self.base_url);
+        let mut request = self.client.get(&url);
+        if let Some(token) = &self.token {
+            request = request.bearer_auth(token);
+        }
+
+        let response = request.send().await?;
+        match response.status() {
+            StatusCode::OK => {
+                let json_response: ListAgentsResponse = response.json().await?;
+                Ok(json_response)
+            }
+            StatusCode::UNAUTHORIZED => {
+                let error: ErrorResponse = response.json().await?;
+                Err(GatewayError::Unauthorized(error.error))
+            }
+            StatusCode::FORBIDDEN => {
+                let error: ErrorResponse = response.json().await?;
+                Err(GatewayError::Forbidden(error.error))
+            }
+            StatusCode::INTERNAL_SERVER_ERROR => {
+                let error: ErrorResponse = response.json().await?;
+                Err(GatewayError::InternalError(error.error))
+            }
+            _ => Err(GatewayError::Other(Box::new(std::io::Error::other(
+                format!("Unexpected status code: {}", response.status()),
+            )))),
+        }
+    }
+
+    async fn get_agent(&self, id: &str) -> Result<A2AAgentCard, GatewayError> {
+        let url = format!("{}/a2a/agents/{}", self.base_url, id);
+        let mut request = self.client.get(&url);
+        if let Some(token) = &self.token {
+            request = request.bearer_auth(token);
+        }
+
+        let response = request.send().await?;
+        match response.status() {
+            StatusCode::OK => {
+                let json_response: A2AAgentCard = response.json().await?;
+                Ok(json_response)
+            }
+            StatusCode::UNAUTHORIZED => {
+                let error: ErrorResponse = response.json().await?;
+                Err(GatewayError::Unauthorized(error.error))
+            }
+            StatusCode::FORBIDDEN => {
+                let error: ErrorResponse = response.json().await?;
+                Err(GatewayError::Forbidden(error.error))
+            }
+            StatusCode::NOT_FOUND => {
+                let error: ErrorResponse = response.json().await?;
+                Err(GatewayError::NotFound(error.error))
+            }
+            StatusCode::INTERNAL_SERVER_ERROR => {
+                let error: ErrorResponse = response.json().await?;
+                Err(GatewayError::InternalError(error.error))
+            }
+            _ => Err(GatewayError::Other(Box::new(std::io::Error::other(
+                format!("Unexpected status code: {}", response.status()),
+            )))),
+        }
+    }
+
     async fn health_check(&self) -> Result<bool, GatewayError> {
         let url = format!("{}/health", self.base_url);
 
@@ -756,8 +977,9 @@ impl InferenceGatewayAPI for InferenceGatewayClient {
 mod tests {
     use crate::{
         CreateChatCompletionRequest, CreateChatCompletionResponse,
-        CreateChatCompletionStreamResponse, FunctionObject, GatewayError, InferenceGatewayAPI,
-        InferenceGatewayClient, Message, MessageRole, Provider, Tool, ToolType,
+        CreateChatCompletionStreamResponse, FinishReason, FunctionObject, GatewayError,
+        InferenceGatewayAPI, InferenceGatewayClient, Message, MessageRole, Provider, Tool,
+        ToolType,
     };
     use futures_util::{pin_mut, StreamExt};
     use mockito::{Matcher, Server};
@@ -1374,7 +1596,7 @@ mod tests {
             if generate_response.choices[0].finish_reason.is_some() {
                 assert_eq!(
                     generate_response.choices[0].finish_reason.as_ref().unwrap(),
-                    "stop"
+                    &FinishReason::Stop
                 );
                 break;
             }
@@ -1926,4 +2148,134 @@ mod tests {
         mock.assert();
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_list_agents() -> Result<(), GatewayError> {
+        let mut server = Server::new_async().await;
+
+        let raw_response_json = r#"{
+            "object": "list",
+            "data": [
+                {
+                    "id": "agent-123",
+                    "name": "Test Agent",
+                    "description": "A test A2A agent",
+                    "url": "http://test-agent:8080",
+                    "version": "1.0.0",
+                    "defaultInputModes": ["text/plain"],
+                    "defaultOutputModes": ["text/plain"],
+                    "skills": []
+                }
+            ]
+        }"#;
+
+        let mock = server
+            .mock("GET", "/v1/a2a/agents")
+            .with_status(200)
+            .with_header("content-type", "application/json")
+            .with_body(raw_response_json)
+            .create();
+
+        let base_url = format!("{}/v1", server.url());
+        let client = InferenceGatewayClient::new(&base_url);
+        let response = client.list_agents().await?;
+
+        assert_eq!(response.object, "list");
+        assert_eq!(response.data.len(), 1);
+        assert_eq!(response.data[0].id, "agent-123");
+        assert_eq!(response.data[0].name, "Test Agent");
+        assert_eq!(response.data[0].url, "http://test-agent:8080");
+        mock.assert();
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_list_agents_a2a_not_exposed() -> Result<(), GatewayError> {
+        let mut server = Server::new_async().await;
+
+        let mock = server
+            .mock("GET", "/v1/a2a/agents")
+            .with_status(403)
+            .with_header("content-type", "application/json")
+            .with_body(
+                r#"{"error":"A2A agents endpoint is not exposed. Set EXPOSE_A2A=true to enable."}"#,
+            )
+            .create();
+
+        let base_url = format!("{}/v1", server.url());
+        let client = InferenceGatewayClient::new(&base_url);
+
+        match client.list_agents().await {
+            Err(GatewayError::Forbidden(msg)) => {
+                assert_eq!(
+                    msg,
+                    "A2A agents endpoint is not exposed. Set EXPOSE_A2A=true to enable."
+                );
+            }
+            _ => panic!("Expected Forbidden error for A2A not exposed"),
+        }
+
+        mock.assert();
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_get_agent() -> Result<(), GatewayError> {
+        let mut server = Server::new_async().await;
+
+        let raw_response_json = r#"{
+            "id": "agent-123",
+            "name": "Test Agent",
+            "description": "A test A2A agent",
+            "url": "http://test-agent:8080",
+            "version": "1.0.0",
+            "defaultInputModes": ["text/plain"],
+            "defaultOutputModes": ["text/plain"],
+            "skills": []
+        }"#;
+
+        let mock = server
+            .mock("GET", "/v1/a2a/agents/agent-123")
+            .with_status(200)
+            .with_header("content-type", "application/json")
+            .with_body(raw_response_json)
+            .create();
+
+        let base_url = format!("{}/v1", server.url());
+        let client = InferenceGatewayClient::new(&base_url);
+        let response = client.get_agent("agent-123").await?;
+
+        assert_eq!(response.id, "agent-123");
+        assert_eq!(response.name, "Test Agent");
+        assert_eq!(response.url, "http://test-agent:8080");
+        mock.assert();
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_get_agent_not_found() -> Result<(), GatewayError> {
+        let mut server = Server::new_async().await;
+
+        let mock = server
+            .mock("GET", "/v1/a2a/agents/non-existent")
+            .with_status(404)
+            .with_header("content-type", "application/json")
+            .with_body(r#"{"error":"Agent not found"}"#)
+            .create();
+
+        let base_url = format!("{}/v1", server.url());
+        let client = InferenceGatewayClient::new(&base_url);
+
+        match client.get_agent("non-existent").await {
+            Err(GatewayError::NotFound(msg)) => {
+                assert_eq!(msg, "Agent not found");
+            }
+            _ => panic!("Expected NotFound error"),
+        }
+
+        mock.assert();
+        Ok(())
+    }
 }

From 99cd978606f5e2486cc56ca4769b35144287668e Mon Sep 17 00:00:00 2001
From: Eden Reich <eden.reich@gmail.com>
Date: Sat, 22 Nov 2025 01:02:23 +0200
Subject: [PATCH 2/5] chore: Download the latest schema

---
 Taskfile.yml |   2 +-
 openapi.yaml | 405 +++++++++++++++++++++------------------------------
 2 files changed, 167 insertions(+), 240 deletions(-)

diff --git a/Taskfile.yml b/Taskfile.yml
index 25f5b4a..ed323e6 100644
--- a/Taskfile.yml
+++ b/Taskfile.yml
@@ -4,7 +4,7 @@ tasks:
   oas-download:
     desc: Download OpenAPI specification
     cmds:
-      - curl -o openapi.yaml https://raw.githubusercontent.com/inference-gateway/inference-gateway/refs/heads/main/openapi.yaml
+      - curl -o openapi.yaml https://raw.githubusercontent.com/inference-gateway/schemas/refs/heads/main/openapi.yaml
 
   lint:
     desc: Run linter
diff --git a/openapi.yaml b/openapi.yaml
index 317a74c..0efb48b 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -31,8 +31,6 @@ tags:
     description: Generate completions from the models.
   - name: MCP
     description: List and manage MCP tools.
-  - name: A2A
-    description: List and manage A2A agents.
   - name: Proxy
     description: Proxy requests to provider endpoints.
   - name: Health
@@ -96,6 +94,16 @@ paths:
                         created: 1718441600
                         owned_by: 'ollama'
                         served_by: 'ollama'
+                      - id: 'ollama_cloud/gpt-oss:20b'
+                        object: 'model'
+                        created: 1730419200
+                        owned_by: 'ollama_cloud'
+                        served_by: 'ollama_cloud'
+                      - id: 'mistral/mistral-large-latest'
+                        object: 'model'
+                        created: 1698019200
+                        owned_by: 'mistral'
+                        served_by: 'mistral'
                 singleProvider:
                   summary: Models from a specific provider
                   value:
@@ -179,65 +187,6 @@ paths:
           $ref: '#/components/responses/MCPNotExposed'
         '500':
           $ref: '#/components/responses/InternalError'
-  /a2a/agents:
-    get:
-      operationId: listAgents
-      tags:
-        - A2A
-      description: |
-        Lists the currently available A2A agents. Only accessible when EXPOSE_A2A is enabled.
-      summary: Lists the currently available A2A agents
-      security:
-        - bearerAuth: []
-      responses:
-        '200':
-          description: Successful response
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListAgentsResponse'
-        '401':
-          $ref: '#/components/responses/Unauthorized'
-        '403':
-          $ref: '#/components/responses/A2ANotExposed'
-        '500':
-          $ref: '#/components/responses/InternalError'
-  /a2a/agents/{id}:
-    get:
-      operationId: getAgent
-      tags:
-        - A2A
-      description: |
-        Gets a specific A2A agent by its unique identifier. Only accessible when EXPOSE_A2A is enabled.
-      summary: Gets a specific A2A agent by ID
-      security:
-        - bearerAuth: []
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
-          description: The unique identifier of the agent
-      responses:
-        '200':
-          description: Successful response
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/A2AAgentCard'
-        '401':
-          $ref: '#/components/responses/Unauthorized'
-        '403':
-          $ref: '#/components/responses/A2ANotExposed'
-        '404':
-          description: Agent not found
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Error'
-        '500':
-          $ref: '#/components/responses/InternalError'
   /proxy/{provider}/{path}:
     parameters:
       - name: provider
@@ -415,6 +364,14 @@ components:
                   - role: 'user'
                     content: 'Explain quantum computing'
                 temperature: 0.5
+            mistral:
+              summary: Mistral AI request
+              value:
+                model: 'mistral-large-latest'
+                messages:
+                  - role: 'user'
+                    content: 'Write a Python function to calculate fibonacci numbers'
+                temperature: 0.3
     CreateChatCompletionRequest:
       required: true
       description: |
@@ -451,14 +408,6 @@ components:
             $ref: '#/components/schemas/Error'
           example:
             error: 'MCP tools endpoint is not exposed. Set EXPOSE_MCP=true to enable.'
-    A2ANotExposed:
-      description: A2A agents endpoint is not exposed
-      content:
-        application/json:
-          schema:
-            $ref: '#/components/schemas/Error'
-          example:
-            error: 'A2A agents endpoint is not exposed. Set EXPOSE_A2A=true to enable.'
     ProviderResponse:
       description: |
         ProviderResponse depends on the specific provider and endpoint being called
@@ -489,6 +438,27 @@ components:
                       },
                     ],
                 }
+            mistral:
+              summary: Mistral AI response
+              value:
+                {
+                  'id': 'cmpl-123',
+                  'object': 'chat.completion',
+                  'created': 1677652288,
+                  'model': 'mistral-large-latest',
+                  'choices':
+                    [
+                      {
+                        'index': 0,
+                        'message':
+                          {
+                            'role': 'assistant',
+                            'content': 'def fibonacci(n):\n    if n <= 1:\n        return n\n    return fibonacci(n-1) + fibonacci(n-2)',
+                          },
+                        'finish_reason': 'stop',
+                      },
+                    ],
+                }
   securitySchemes:
     bearerAuth:
       type: http
@@ -503,6 +473,7 @@ components:
       type: string
       enum:
         - ollama
+        - ollama_cloud
         - groq
         - openai
         - cloudflare
@@ -510,11 +481,27 @@ components:
         - anthropic
         - deepseek
         - google
+        - mistral
       x-provider-configs:
         ollama:
           id: 'ollama'
           url: 'http://ollama:8080/v1'
           auth_type: 'none'
+          supports_vision: true
+          endpoints:
+            models:
+              name: 'list_models'
+              method: 'GET'
+              endpoint: '/models'
+            chat:
+              name: 'chat_completions'
+              method: 'POST'
+              endpoint: '/chat/completions'
+        ollama_cloud:
+          id: 'ollama_cloud'
+          url: 'https://ollama.com/v1'
+          auth_type: 'bearer'
+          supports_vision: true
           endpoints:
             models:
               name: 'list_models'
@@ -528,6 +515,7 @@ components:
           id: 'anthropic'
           url: 'https://api.anthropic.com/v1'
           auth_type: 'xheader'
+          supports_vision: true
           endpoints:
             models:
               name: 'list_models'
@@ -541,6 +529,7 @@ components:
           id: 'cohere'
           url: 'https://api.cohere.ai'
           auth_type: 'bearer'
+          supports_vision: true
           endpoints:
             models:
               name: 'list_models'
@@ -554,6 +543,7 @@ components:
           id: 'groq'
           url: 'https://api.groq.com/openai/v1'
           auth_type: 'bearer'
+          supports_vision: true
           endpoints:
             models:
               name: 'list_models'
@@ -567,6 +557,7 @@ components:
           id: 'openai'
           url: 'https://api.openai.com/v1'
           auth_type: 'bearer'
+          supports_vision: true
           endpoints:
             models:
               name: 'list_models'
@@ -580,6 +571,7 @@ components:
           id: 'cloudflare'
           url: 'https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai'
           auth_type: 'bearer'
+          supports_vision: false
           endpoints:
             models:
               name: 'list_models'
@@ -593,6 +585,7 @@ components:
           id: 'deepseek'
           url: 'https://api.deepseek.com'
           auth_type: 'bearer'
+          supports_vision: false
           endpoints:
             models:
               name: 'list_models'
@@ -606,6 +599,21 @@ components:
           id: 'google'
           url: 'https://generativelanguage.googleapis.com/v1beta/openai'
           auth_type: 'bearer'
+          supports_vision: true
+          endpoints:
+            models:
+              name: 'list_models'
+              method: 'GET'
+              endpoint: '/models'
+            chat:
+              name: 'chat_completions'
+              method: 'POST'
+              endpoint: '/chat/completions'
+        mistral:
+          id: 'mistral'
+          url: 'https://api.mistral.ai/v1'
+          auth_type: 'bearer'
+          supports_vision: true
           endpoints:
             models:
               name: 'list_models'
@@ -709,7 +717,13 @@ components:
         role:
           $ref: '#/components/schemas/MessageRole'
         content:
-          type: string
+          oneOf:
+            - type: string
+              description: Text content (backward compatibility)
+            - type: array
+              items:
+                $ref: '#/components/schemas/ContentPart'
+              description: Array of content parts for multimodal messages
         tool_calls:
           type: array
           items:
@@ -725,6 +739,53 @@ components:
       required:
         - role
         - content
+    ContentPart:
+      type: object
+      description: A content part within a multimodal message
+      oneOf:
+        - $ref: '#/components/schemas/TextContentPart'
+        - $ref: '#/components/schemas/ImageContentPart'
+    TextContentPart:
+      type: object
+      description: Text content part
+      properties:
+        type:
+          type: string
+          enum: [text]
+          description: Content type identifier
+        text:
+          type: string
+          description: The text content
+      required:
+        - type
+        - text
+    ImageContentPart:
+      type: object
+      description: Image content part
+      properties:
+        type:
+          type: string
+          enum: [image_url]
+          description: Content type identifier
+        image_url:
+          $ref: '#/components/schemas/ImageURL'
+      required:
+        - type
+        - image_url
+    ImageURL:
+      type: object
+      description: Image URL configuration
+      properties:
+        url:
+          type: string
+          description: URL of the image (data URLs supported)
+        detail:
+          type: string
+          enum: [auto, low, high]
+          default: auto
+          description: Image detail level for vision processing
+      required:
+        - url
     Model:
       type: object
       description: Common model information
@@ -779,103 +840,6 @@ components:
       required:
         - object
         - data
-    ListAgentsResponse:
-      type: object
-      description: Response structure for listing A2A agents
-      properties:
-        object:
-          type: string
-          description: Always "list"
-          example: 'list'
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/A2AAgentCard'
-          default: []
-          description: Array of available A2A agents
-      required:
-        - object
-        - data
-    A2AAgentCard:
-      description: |-
-        An AgentCard conveys key information:
-        - Overall details (version, name, description, uses)
-        - Skills: A set of capabilities the agent can perform
-        - Default modalities/content types supported by the agent.
-        - Authentication requirements
-      properties:
-        capabilities:
-          additionalProperties: true
-          description: Optional capabilities supported by the agent.
-        defaultInputModes:
-          description: |-
-            The set of interaction modes that the agent supports across all skills. This can be overridden per-skill.
-            Supported media types for input.
-          items:
-            type: string
-          type: array
-        defaultOutputModes:
-          description: Supported media types for output.
-          items:
-            type: string
-          type: array
-        description:
-          description: |-
-            A human-readable description of the agent. Used to assist users and
-            other agents in understanding what the agent can do.
-          type: string
-        documentationUrl:
-          description: A URL to documentation for the agent.
-          type: string
-        iconUrl:
-          description: A URL to an icon for the agent.
-          type: string
-        id:
-          description: Unique identifier for the agent (base64-encoded SHA256 hash of the agent URL).
-          type: string
-        name:
-          description: Human readable name of the agent.
-          type: string
-        provider:
-          additionalProperties: true
-          description: The service provider of the agent
-        security:
-          description: Security requirements for contacting the agent.
-          items:
-            additionalProperties: true
-            type: object
-          type: array
-        securitySchemes:
-          additionalProperties: true
-          description: Security scheme details used for authenticating with this agent.
-          type: object
-        skills:
-          description: Skills are a unit of capability that an agent can perform.
-          items:
-            additionalProperties: true
-          type: array
-        supportsAuthenticatedExtendedCard:
-          description: |-
-            true if the agent supports providing an extended agent card when the user is authenticated.
-            Defaults to false if not specified.
-          type: boolean
-        url:
-          description: A URL to the address the agent is hosted at.
-          type: string
-        version:
-          description: The version of the agent - format is up to the provider.
-          type: string
-      required:
-        - capabilities
-        - defaultInputModes
-        - defaultOutputModes
-        - description
-        - id
-        - name
-        - skills
-        - url
-        - version
-      type: object
     MCPTool:
       type: object
       description: An MCP tool definition
@@ -1352,6 +1316,21 @@ components:
                   type: string
                   default: ''
                   description: 'Comma-separated list of models to allow. If empty, all models will be available'
+                - name: enable_vision
+                  env: 'ENABLE_VISION'
+                  type: bool
+                  default: 'false'
+                  description: 'Enable vision/multimodal support for all providers. When disabled, image inputs will be rejected even if the provider and model support vision'
+                - name: debug_content_truncate_words
+                  env: 'DEBUG_CONTENT_TRUNCATE_WORDS'
+                  type: int
+                  default: '10'
+                  description: 'Number of words to truncate per content section in debug logs (development mode only)'
+                - name: debug_max_messages
+                  env: 'DEBUG_MAX_MESSAGES'
+                  type: int
+                  default: '100'
+                  description: 'Maximum number of messages to show in debug logs (development mode only)'
           - telemetry:
               title: 'Telemetry'
               settings:
@@ -1457,78 +1436,6 @@ components:
                   type: bool
                   default: 'true'
                   description: 'Disable health check log messages to reduce noise'
-          - a2a:
-              title: 'Agent-to-Agent (A2A) Protocol'
-              settings:
-                - name: a2a_enable
-                  env: 'A2A_ENABLE'
-                  type: bool
-                  default: 'false'
-                  description: 'Enable A2A protocol support'
-                - name: a2a_expose
-                  env: 'A2A_EXPOSE'
-                  type: bool
-                  default: 'false'
-                  description: 'Expose A2A agents list cards endpoint'
-                - name: a2a_agents
-                  env: 'A2A_AGENTS'
-                  type: string
-                  description: 'Comma-separated list of A2A agent URLs'
-                - name: a2a_client_timeout
-                  env: 'A2A_CLIENT_TIMEOUT'
-                  type: time.Duration
-                  default: '30s'
-                  description: 'A2A client timeout'
-                - name: a2a_polling_enable
-                  env: 'A2A_POLLING_ENABLE'
-                  type: bool
-                  default: 'true'
-                  description: 'Enable task status polling'
-                - name: a2a_polling_interval
-                  env: 'A2A_POLLING_INTERVAL'
-                  type: time.Duration
-                  default: '1s'
-                  description: 'Interval between polling requests'
-                - name: a2a_polling_timeout
-                  env: 'A2A_POLLING_TIMEOUT'
-                  type: time.Duration
-                  default: '30s'
-                  description: 'Maximum time to wait for task completion'
-                - name: a2a_max_poll_attempts
-                  env: 'A2A_MAX_POLL_ATTEMPTS'
-                  type: int
-                  default: '30'
-                  description: 'Maximum number of polling attempts'
-                - name: a2a_max_retries
-                  env: 'A2A_MAX_RETRIES'
-                  type: int
-                  default: '3'
-                  description: 'Maximum number of connection retry attempts'
-                - name: a2a_retry_interval
-                  env: 'A2A_RETRY_INTERVAL'
-                  type: time.Duration
-                  default: '5s'
-                  description: 'Interval between connection retry attempts'
-                - name: a2a_initial_backoff
-                  env: 'A2A_INITIAL_BACKOFF'
-                  type: time.Duration
-                  default: '1s'
-                  description: 'Initial backoff duration for exponential backoff retry'
-                - name: a2a_enable_reconnect
-                  env: 'A2A_ENABLE_RECONNECT'
-                  type: bool
-                  default: 'true'
-                  description: 'Enable automatic reconnection for failed agents'
-                - name: a2a_reconnect_interval
-                  env: 'A2A_RECONNECT_INTERVAL'
-                  type: time.Duration
-                  default: '30s'
-                  description: 'Interval between reconnection attempts'
-                - name: a2a_disable_healthcheck_logs
-                  env: 'A2A_DISABLE_HEALTHCHECK_LOGS'
-                  type: bool
-                  default: 'true'
-                  description: 'Disable health check log messages to reduce noise'
           - auth:
               title: 'Authentication'
               settings:
@@ -1685,6 +1592,16 @@ components:
                   type: string
                   description: 'Ollama API Key'
                   secret: true
+                - name: ollama_cloud_api_url
+                  env: 'OLLAMA_CLOUD_API_URL'
+                  type: string
+                  default: 'https://ollama.com/v1'
+                  description: 'Ollama Cloud API URL'
+                - name: ollama_cloud_api_key
+                  env: 'OLLAMA_CLOUD_API_KEY'
+                  type: string
+                  description: 'Ollama Cloud API Key'
+                  secret: true
                 - name: openai_api_url
                   env: 'OPENAI_API_URL'
                   type: string
@@ -1715,3 +1632,13 @@ components:
                   type: string
                   description: 'Google API Key'
                   secret: true
+                - name: mistral_api_url
+                  env: 'MISTRAL_API_URL'
+                  type: string
+                  default: 'https://api.mistral.ai/v1'
+                  description: 'Mistral API URL'
+                - name: mistral_api_key
+                  env: 'MISTRAL_API_KEY'
+                  type: string
+                  description: 'Mistral API Key'
+                  secret: true

From c56690cef3b87545f2ffb07c7723a973fafbd739 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 23:07:35 +0000
Subject: [PATCH 3/5] feat(sdk): Sync with latest OpenAPI specification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add OllamaCloud and Mistral providers to Provider enum
- Update Display and TryFrom implementations for new providers
- Add Serialize derive to ChatCompletionMessageToolCallChunk
- Add logprobs field to ChatCompletionChoice
- Add reasoning_format field to request/response types
- Update all tests to include new providers and logprobs field

Co-authored-by: Eden Reich <edenreich@users.noreply.github.com>

🤖 Generated with [Claude Code](https://claude.ai/code)
---
 src/lib.rs | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index 733d4ce..14be81a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -172,6 +172,8 @@ pub struct ListAgentsResponse {
 pub enum Provider {
     #[serde(alias = "Ollama", alias = "OLLAMA")]
     Ollama,
+    #[serde(alias = "OllamaCloud", alias = "OLLAMA_CLOUD", rename = "ollama_cloud")]
+    OllamaCloud,
     #[serde(alias = "Groq", alias = "GROQ")]
     Groq,
     #[serde(alias = "OpenAI", alias = "OPENAI")]
@@ -186,12 +188,15 @@ pub enum Provider {
     Deepseek,
     #[serde(alias = "Google", alias = "GOOGLE")]
     Google,
+    #[serde(alias = "Mistral", alias = "MISTRAL")]
+    Mistral,
 }
 
 impl fmt::Display for Provider {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             Provider::Ollama => write!(f, "ollama"),
+            Provider::OllamaCloud => write!(f, "ollama_cloud"),
             Provider::Groq => write!(f, "groq"),
             Provider::OpenAI => write!(f, "openai"),
             Provider::Cloudflare => write!(f, "cloudflare"),
@@ -199,6 +204,7 @@ impl fmt::Display for Provider {
             Provider::Anthropic => write!(f, "anthropic"),
             Provider::Deepseek => write!(f, "deepseek"),
             Provider::Google => write!(f, "google"),
+            Provider::Mistral => write!(f, "mistral"),
         }
     }
 }
@@ -209,6 +215,7 @@ impl TryFrom<&str> for Provider {
     fn try_from(s: &str) -> Result<Self, Self::Error> {
         match s.to_lowercase().as_str() {
             "ollama" => Ok(Self::Ollama),
+            "ollama_cloud" => Ok(Self::OllamaCloud),
             "groq" => Ok(Self::Groq),
             "openai" => Ok(Self::OpenAI),
             "cloudflare" => Ok(Self::Cloudflare),
@@ -216,6 +223,7 @@ impl TryFrom<&str> for Provider {
             "anthropic" => Ok(Self::Anthropic),
             "deepseek" => Ok(Self::Deepseek),
             "google" => Ok(Self::Google),
+            "mistral" => Ok(Self::Mistral),
             _ => Err(GatewayError::BadRequest(format!("Unknown provider: {s}"))),
         }
     }
@@ -336,10 +344,13 @@ struct CreateChatCompletionRequest {
     /// Maximum number of tokens to generate
     #[serde(skip_serializing_if = "Option::is_none")]
     max_tokens: Option<i32>,
+    /// The format of the reasoning content. Can be `raw` or `parsed`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    reasoning_format: Option<String>,
 }
 
 /// A tool call chunk in streaming responses
-#[derive(Debug, Deserialize, Clone)]
+#[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ChatCompletionMessageToolCallChunk {
     /// Index of the tool call in the array
     pub index: i32,
@@ -375,6 +386,8 @@ pub struct ChatCompletionChoice {
     pub finish_reason: FinishReason,
     pub message: Message,
     pub index: i32,
+    /// Log probability information for the choice
+    pub logprobs: Option<ChoiceLogprobs>,
 }
 
 /// The response from generating content
@@ -406,6 +419,9 @@ pub struct CreateChatCompletionStreamResponse {
     /// Usage statistics for the completion request.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub usage: Option<CompletionUsage>,
+    /// The format of the reasoning content. Can be `raw` or `parsed`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning_format: Option<String>,
 }
 
 /// Token log probability information
@@ -783,6 +799,7 @@ impl InferenceGatewayAPI for InferenceGatewayClient {
             stream: false,
             tools: self.tools.clone(),
             max_tokens: self.max_tokens,
+            reasoning_format: None,
         };
 
         let response = request.json(&request_payload).send().await?;
@@ -828,6 +845,7 @@ impl InferenceGatewayAPI for InferenceGatewayClient {
             stream: true,
             tools: None,
             max_tokens: None,
+            reasoning_format: None,
         };
 
         async_stream::try_stream! {
@@ -989,6 +1007,7 @@ mod tests {
     fn test_provider_serialization() {
         let providers = vec![
             (Provider::Ollama, "ollama"),
+            (Provider::OllamaCloud, "ollama_cloud"),
             (Provider::Groq, "groq"),
             (Provider::OpenAI, "openai"),
             (Provider::Cloudflare, "cloudflare"),
@@ -996,6 +1015,7 @@ mod tests {
             (Provider::Anthropic, "anthropic"),
             (Provider::Deepseek, "deepseek"),
             (Provider::Google, "google"),
+            (Provider::Mistral, "mistral"),
         ];
 
         for (provider, expected) in providers {
@@ -1008,6 +1028,7 @@ mod tests {
     fn test_provider_deserialization() {
         let test_cases = vec![
             ("\"ollama\"", Provider::Ollama),
+            ("\"ollama_cloud\"", Provider::OllamaCloud),
             ("\"groq\"", Provider::Groq),
             ("\"openai\"", Provider::OpenAI),
             ("\"cloudflare\"", Provider::Cloudflare),
@@ -1015,6 +1036,7 @@ mod tests {
             ("\"anthropic\"", Provider::Anthropic),
             ("\"deepseek\"", Provider::Deepseek),
             ("\"google\"", Provider::Google),
+            ("\"mistral\"", Provider::Mistral),
         ];
 
         for (json, expected) in test_cases {
@@ -1062,6 +1084,7 @@ mod tests {
     fn test_provider_display() {
         let providers = vec![
             (Provider::Ollama, "ollama"),
+            (Provider::OllamaCloud, "ollama_cloud"),
             (Provider::Groq, "groq"),
             (Provider::OpenAI, "openai"),
             (Provider::Cloudflare, "cloudflare"),
@@ -1069,6 +1092,7 @@ mod tests {
             (Provider::Anthropic, "anthropic"),
             (Provider::Deepseek, "deepseek"),
             (Provider::Google, "google"),
+            (Provider::Mistral, "mistral"),
         ];
 
         for (provider, expected) in providers {
@@ -1329,6 +1353,7 @@ mod tests {
                 {
                     "index": 0,
                     "finish_reason": "stop",
+                    "logprobs": null,
                     "message": {
                         "role": "assistant",
                         "content": "Hellloooo"
@@ -1376,6 +1401,7 @@ mod tests {
                 {
                     "index": 0,
                     "finish_reason": "stop",
+                    "logprobs": null,
                     "message": {
                         "role": "assistant",
                         "content": "Hello"
@@ -1517,6 +1543,7 @@ mod tests {
                 {
                     "index": 0,
                     "finish_reason": "stop",
+                    "logprobs": null,
                     "message": {
                         "role": "assistant",
                         "content": "Hello"
@@ -1671,6 +1698,7 @@ mod tests {
                 {
                     "index": 0,
                     "finish_reason": "tool_calls",
+                    "logprobs": null,
                     "message": {
                         "role": "assistant",
                         "content": "Let me check the weather for you.",
@@ -1760,6 +1788,7 @@ mod tests {
                 {
                     "index": 0,
                     "finish_reason": "stop",
+                    "logprobs": null,
                     "message": {
                         "role": "assistant",
                         "content": "Hello!"
@@ -1844,6 +1873,7 @@ mod tests {
                 {
                     "index": 0,
                     "finish_reason": "stop",
+                    "logprobs": null,
                     "message": {
                         "role": "assistant",
                         "content": "Let me check the weather for you",
@@ -1941,6 +1971,7 @@ mod tests {
                 {
                     "index": 0,
                     "finish_reason": "stop",
+                    "logprobs": null,
                     "message": {
                         "role": "assistant",
                         "content": "Here's a poem with 100 tokens..."

From a11c355084481462d4b1e28ff5d68af76fd51a74 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 23:11:17 +0000
Subject: [PATCH 4/5] style: Fix rustfmt formatting for serde attribute

Co-authored-by: Eden Reich <edenreich@users.noreply.github.com>
---
 src/lib.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index 14be81a..ecfddbd 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -153,7 +153,10 @@ pub struct A2AAgentCard {
     #[serde(rename = "securitySchemes", skip_serializing_if = "Option::is_none")]
     pub security_schemes: Option<Value>,
     /// True if the agent supports providing an extended agent card when the user is authenticated
-    #[serde(rename = "supportsAuthenticatedExtendedCard", skip_serializing_if = "Option::is_none")]
+    #[serde(
+        rename = "supportsAuthenticatedExtendedCard",
+        skip_serializing_if = "Option::is_none"
+    )]
     pub supports_authenticated_extended_card: Option<bool>,
 }
 

From ce8830c5bb58359bff22dd991030866c633b0e12 Mon Sep 17 00:00:00 2001
From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com>
Date: Fri, 21 Nov 2025 23:18:09 +0000
Subject: [PATCH 5/5] fix: Remove A2A code and fix missing reasoning_format
 field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove A2AAgentCard and ListAgentsResponse structs
- Remove list_agents() and get_agent() trait methods
- Remove A2A implementation methods from InferenceGatewayClient
- Remove A2A-related tests
- Add missing reasoning_format field in test_generate_request_serialization

Co-authored-by: Eden Reich <edenreich@users.noreply.github.com>

🤖 Generated with [Claude Code](https://claude.ai/code)
---
 src/lib.rs | 284 +----------------------------------------------------
 1 file changed, 1 insertion(+), 283 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index ecfddbd..9f0dde3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -113,62 +113,6 @@ pub struct ListToolsResponse {
     pub data: Vec<MCPTool>,
 }
 
-/// An A2A agent card definition
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct A2AAgentCard {
-    /// Unique identifier for the agent (base64-encoded SHA256 hash of the agent URL)
-    pub id: String,
-    /// Human readable name of the agent
-    pub name: String,
-    /// A human-readable description of the agent
-    pub description: String,
-    /// A URL to the address the agent is hosted at
-    pub url: String,
-    /// The version of the agent
-    pub version: String,
-    /// Optional capabilities supported by the agent
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub capabilities: Option<Value>,
-    /// The set of interaction modes that the agent supports across all skills
-    #[serde(rename = "defaultInputModes")]
-    pub default_input_modes: Vec<String>,
-    /// Supported media types for output
-    #[serde(rename = "defaultOutputModes")]
-    pub default_output_modes: Vec<String>,
-    /// Skills are a unit of capability that an agent can perform
-    pub skills: Vec<Value>,
-    /// A URL to documentation for the agent
-    #[serde(rename = "documentationUrl", skip_serializing_if = "Option::is_none")]
-    pub documentation_url: Option<String>,
-    /// A URL to an icon for the agent
-    #[serde(rename = "iconUrl", skip_serializing_if = "Option::is_none")]
-    pub icon_url: Option<String>,
-    /// The service provider of the agent
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub provider: Option<Value>,
-    /// Security requirements for contacting the agent
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub security: Option<Vec<Value>>,
-    /// Security scheme details used for authenticating with this agent
-    #[serde(rename = "securitySchemes", skip_serializing_if = "Option::is_none")]
-    pub security_schemes: Option<Value>,
-    /// True if the agent supports providing an extended agent card when the user is authenticated
-    #[serde(
-        rename = "supportsAuthenticatedExtendedCard",
-        skip_serializing_if = "Option::is_none"
-    )]
-    pub supports_authenticated_extended_card: Option<bool>,
-}
-
-/// Response structure for listing A2A agents
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ListAgentsResponse {
-    /// Response object type, always "list"
-    pub object: String,
-    /// Array of available A2A agents
-    pub data: Vec<A2AAgentCard>,
-}
-
 /// Supported LLM providers
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)]
 #[serde(rename_all = "lowercase")]
@@ -611,37 +555,6 @@ pub trait InferenceGatewayAPI {
     /// A list of available MCP tools. Only accessible when EXPOSE_MCP is enabled.
     fn list_tools(&self) -> impl Future<Output = Result<ListToolsResponse, GatewayError>> + Send;
 
-    /// Lists available A2A agents
-    ///
-    /// # Errors
-    /// - Returns [`GatewayError::Unauthorized`] if authentication fails
-    /// - Returns [`GatewayError::Forbidden`] if A2A is not exposed
-    /// - Returns [`GatewayError::InternalError`] if the server has an error
-    /// - Returns [`GatewayError::Other`] for other errors
-    ///
-    /// # Returns
-    /// A list of available A2A agents. Only accessible when EXPOSE_A2A is enabled.
-    fn list_agents(&self) -> impl Future<Output = Result<ListAgentsResponse, GatewayError>> + Send;
-
-    /// Gets a specific A2A agent by ID
-    ///
-    /// # Arguments
-    /// * `id` - The unique identifier of the agent
-    ///
-    /// # Errors
-    /// - Returns [`GatewayError::Unauthorized`] if authentication fails
-    /// - Returns [`GatewayError::Forbidden`] if A2A is not exposed
-    /// - Returns [`GatewayError::NotFound`] if the agent is not found
-    /// - Returns [`GatewayError::InternalError`] if the server has an error
-    /// - Returns [`GatewayError::Other`] for other errors
-    ///
-    /// # Returns
-    /// The A2A agent card. Only accessible when EXPOSE_A2A is enabled.
-    fn get_agent(
-        &self,
-        id: &str,
-    ) -> impl Future<Output = Result<A2AAgentCard, GatewayError>> + Send;
-
     /// Checks if the API is available
     fn health_check(&self) -> impl Future<Output = Result<bool, GatewayError>> + Send;
 }
@@ -917,72 +830,6 @@ impl InferenceGatewayAPI for InferenceGatewayClient {
         }
     }
 
-    async fn list_agents(&self) -> Result<ListAgentsResponse, GatewayError> {
-        let url = format!("{}/a2a/agents", self.base_url);
-        let mut request = self.client.get(&url);
-        if let Some(token) = &self.token {
-            request = request.bearer_auth(token);
-        }
-
-        let response = request.send().await?;
-        match response.status() {
-            StatusCode::OK => {
-                let json_response: ListAgentsResponse = response.json().await?;
-                Ok(json_response)
-            }
-            StatusCode::UNAUTHORIZED => {
-                let error: ErrorResponse = response.json().await?;
-                Err(GatewayError::Unauthorized(error.error))
-            }
-            StatusCode::FORBIDDEN => {
-                let error: ErrorResponse = response.json().await?;
-                Err(GatewayError::Forbidden(error.error))
-            }
-            StatusCode::INTERNAL_SERVER_ERROR => {
-                let error: ErrorResponse = response.json().await?;
-                Err(GatewayError::InternalError(error.error))
-            }
-            _ => Err(GatewayError::Other(Box::new(std::io::Error::other(
-                format!("Unexpected status code: {}", response.status()),
-            )))),
-        }
-    }
-
-    async fn get_agent(&self, id: &str) -> Result<A2AAgentCard, GatewayError> {
-        let url = format!("{}/a2a/agents/{}", self.base_url, id);
-        let mut request = self.client.get(&url);
-        if let Some(token) = &self.token {
-            request = request.bearer_auth(token);
-        }
-
-        let response = request.send().await?;
-        match response.status() {
-            StatusCode::OK => {
-                let json_response: A2AAgentCard = response.json().await?;
-                Ok(json_response)
-            }
-            StatusCode::UNAUTHORIZED => {
-                let error: ErrorResponse = response.json().await?;
-                Err(GatewayError::Unauthorized(error.error))
-            }
-            StatusCode::FORBIDDEN => {
-                let error: ErrorResponse = response.json().await?;
-                Err(GatewayError::Forbidden(error.error))
-            }
-            StatusCode::NOT_FOUND => {
-                let error: ErrorResponse = response.json().await?;
-                Err(GatewayError::NotFound(error.error))
-            }
-            StatusCode::INTERNAL_SERVER_ERROR => {
-                let error: ErrorResponse = response.json().await?;
-                Err(GatewayError::InternalError(error.error))
-            }
-            _ => Err(GatewayError::Other(Box::new(std::io::Error::other(
-                format!("Unexpected status code: {}", response.status()),
-            )))),
-        }
-    }
-
     async fn health_check(&self) -> Result<bool, GatewayError> {
         let url = format!("{}/health", self.base_url);
 
@@ -1158,6 +1005,7 @@ mod tests {
                 },
             }]),
             max_tokens: None,
+            reasoning_format: None,
         };
 
         let serialized = serde_json::to_string_pretty(&request_payload).unwrap();
@@ -2182,134 +2030,4 @@ mod tests {
         mock.assert();
         Ok(())
     }
-
-    #[tokio::test]
-    async fn test_list_agents() -> Result<(), GatewayError> {
-        let mut server = Server::new_async().await;
-
-        let raw_response_json = r#"{
-            "object": "list",
-            "data": [
-                {
-                    "id": "agent-123",
-                    "name": "Test Agent",
-                    "description": "A test A2A agent",
-                    "url": "http://test-agent:8080",
-                    "version": "1.0.0",
-                    "defaultInputModes": ["text/plain"],
-                    "defaultOutputModes": ["text/plain"],
-                    "skills": []
-                }
-            ]
-        }"#;
-
-        let mock = server
-            .mock("GET", "/v1/a2a/agents")
-            .with_status(200)
-            .with_header("content-type", "application/json")
-            .with_body(raw_response_json)
-            .create();
-
-        let base_url = format!("{}/v1", server.url());
-        let client = InferenceGatewayClient::new(&base_url);
-        let response = client.list_agents().await?;
-
-        assert_eq!(response.object, "list");
-        assert_eq!(response.data.len(), 1);
-        assert_eq!(response.data[0].id, "agent-123");
-        assert_eq!(response.data[0].name, "Test Agent");
-        assert_eq!(response.data[0].url, "http://test-agent:8080");
-        mock.assert();
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_list_agents_a2a_not_exposed() -> Result<(), GatewayError> {
-        let mut server = Server::new_async().await;
-
-        let mock = server
-            .mock("GET", "/v1/a2a/agents")
-            .with_status(403)
-            .with_header("content-type", "application/json")
-            .with_body(
-                r#"{"error":"A2A agents endpoint is not exposed. Set EXPOSE_A2A=true to enable."}"#,
-            )
-            .create();
-
-        let base_url = format!("{}/v1", server.url());
-        let client = InferenceGatewayClient::new(&base_url);
-
-        match client.list_agents().await {
-            Err(GatewayError::Forbidden(msg)) => {
-                assert_eq!(
-                    msg,
-                    "A2A agents endpoint is not exposed. Set EXPOSE_A2A=true to enable."
-                );
-            }
-            _ => panic!("Expected Forbidden error for A2A not exposed"),
-        }
-
-        mock.assert();
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_get_agent() -> Result<(), GatewayError> {
-        let mut server = Server::new_async().await;
-
-        let raw_response_json = r#"{
-            "id": "agent-123",
-            "name": "Test Agent",
-            "description": "A test A2A agent",
-            "url": "http://test-agent:8080",
-            "version": "1.0.0",
-            "defaultInputModes": ["text/plain"],
-            "defaultOutputModes": ["text/plain"],
-            "skills": []
-        }"#;
-
-        let mock = server
-            .mock("GET", "/v1/a2a/agents/agent-123")
-            .with_status(200)
-            .with_header("content-type", "application/json")
-            .with_body(raw_response_json)
-            .create();
-
-        let base_url = format!("{}/v1", server.url());
-        let client = InferenceGatewayClient::new(&base_url);
-        let response = client.get_agent("agent-123").await?;
-
-        assert_eq!(response.id, "agent-123");
-        assert_eq!(response.name, "Test Agent");
-        assert_eq!(response.url, "http://test-agent:8080");
-        mock.assert();
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_get_agent_not_found() -> Result<(), GatewayError> {
-        let mut server = Server::new_async().await;
-
-        let mock = server
-            .mock("GET", "/v1/a2a/agents/non-existent")
-            .with_status(404)
-            .with_header("content-type", "application/json")
-            .with_body(r#"{"error":"Agent not found"}"#)
-            .create();
-
-        let base_url = format!("{}/v1", server.url());
-        let client = InferenceGatewayClient::new(&base_url);
-
-        match client.get_agent("non-existent").await {
-            Err(GatewayError::NotFound(msg)) => {
-                assert_eq!(msg, "Agent not found");
-            }
-            _ => panic!("Expected NotFound error"),
-        }
-
-        mock.assert();
-        Ok(())
-    }
 }