(fix) Streaming and Tool Calling (#29)

danwritecode · web-flow · commit f27a4120d47b · 2025-05-21T20:49:44.000-04:00
* fixes to various models to get tool calling and streaming working

* added missing assistant error

* cleaned up content option in streaming
diff --git a/backend/Cargo.lock b/backend/Cargo.lock
diff --git a/backend/src/common/mapping/openrouter.rs b/backend/src/common/mapping/openrouter.rs
@@ -65,7 +65,7 @@ impl Into<ToolCall> for ChatCompletionRequestToolCall {
     fn into(self) -> ToolCall {
         ToolCall {
             id: Some(self.id),
-            index: self.index,
+            index: None,
             kind: Some(self.kind),
             function_call: self.function_call.into(),
         }
diff --git a/backend/src/common/types/chat_request.rs b/backend/src/common/types/chat_request.rs
@@ -63,12 +63,9 @@ pub enum ChatCompletionRequestMessage{
         #[serde(skip_serializing_if = "Option::is_none")]
         name: Option<String>,
     },
-    #[serde(rename_all = "camelCase")]
     Assistant { 
-        content: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
+        content: Option<String>,
         tool_calls: Option<Vec<ChatCompletionRequestToolCall>>,
-        #[serde(skip_serializing_if = "Option::is_none")]
         name: Option<String>,
     },
     Tool { 
@@ -89,8 +86,6 @@ pub struct ChatCompletionRequestFunctionCall {
 pub struct ChatCompletionRequestToolCall {
     /// A unique identifier for the tool call.
     pub id: String,
-    /// The index of the tool call in the list of tool calls
-    pub index: u32,
     /// The type of call. It must be "function" for function calls.
     #[serde(rename = "type")]
     pub kind: String,
@@ -124,12 +119,47 @@ pub struct ChatCompletionRequestFunctionDescription {
 // Helper Methods for easy extraction
 impl ChatCompletionRequestMessage {
     /// Returns the content of the message regardless of its role
-    pub fn content(&self) -> &str {
+    pub fn content(&self) -> Option<String> {
+        match self {
+            ChatCompletionRequestMessage::System { content, .. } => Some(content.clone()),
+            ChatCompletionRequestMessage::User { content, .. } => Some(content.clone()),
+            ChatCompletionRequestMessage::Assistant { content, .. } => content.clone().map(|c| c),
+            ChatCompletionRequestMessage::Tool { content, .. } => Some(content.clone()),
+        }
+    }
+
+    pub fn system_content(&self) -> String {
         match self {
-            ChatCompletionRequestMessage::System { content, .. } => content,
-            ChatCompletionRequestMessage::User { content, .. } => content,
-            ChatCompletionRequestMessage::Assistant { content, .. } => content,
-            ChatCompletionRequestMessage::Tool { content, .. } => content,
+            ChatCompletionRequestMessage::System { content, .. } => content.clone(),
+            _ => "".to_string()
+        }
+    }
+
+    pub fn user_content(&self) -> String {
+        match self {
+            ChatCompletionRequestMessage::User { content, .. } => content.clone(),
+            _ => "".to_string()
+        }
+    }
+
+    pub fn assistant_content(&self) -> Option<String> {
+        match self {
+            ChatCompletionRequestMessage::Assistant { content, .. } => content.clone(),
+            _ => None
+        }
+    }
+
+    pub fn tool_content(&self) -> String {
+        match self {
+            ChatCompletionRequestMessage::Tool { content, .. } => content.clone(),
+            _ => "".to_string()
+        }
+    }
+
+    pub fn tool_call_id(&self) -> Option<String> {
+        match self {
+            ChatCompletionRequestMessage::Tool { tool_call_id, .. } => Some(tool_call_id.clone()),
+            _ => None
         }
     }
 
diff --git a/backend/src/common/types/chat_response.rs b/backend/src/common/types/chat_response.rs
@@ -33,20 +33,24 @@ pub struct LlmServiceChatCompletionResponseUsage {
 #[derive(Debug, Deserialize, Serialize)]
 pub struct LlmServiceChatCompletionResponseMessage {
     pub role: String,
-    pub content: String,
+
+    pub content: Option<String>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub name: Option<String>,
     // Optionally include tool_calls when the assistant message contains a tool call.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_calls: Option<Vec<LlmServiceChatCompletionResponseToolCall>>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_call_id: Option<String>,
 }
 
 #[derive(Debug, Deserialize, Serialize)]
 pub struct LlmServiceChatCompletionResponseToolCall {
     /// A unique identifier for the tool call.
     pub id: Option<String>,
     /// The index of the tool call in the list of tool calls
-    pub index: u32,
+    pub index: Option<u32>,
     /// The type of call. When streaming, the first chunk only will contain "function".
     #[serde(rename = "type")]
     pub kind: Option<String>,
@@ -69,7 +73,7 @@ impl LlmServiceChatCompletionResponse {
     /// Useful for handling streamed responses which are typically simpler.
     pub fn new_streamed(
         id: String,
-        message_content: String,
+        message_content: Option<String>,
         model: String,
         created: i64,
         prompt_tokens: Option<u32>,
@@ -84,6 +88,7 @@ impl LlmServiceChatCompletionResponse {
                 content: message_content,
                 name: None,
                 tool_calls: None,
+                tool_call_id: None,
             },
             finish_reason: Some("stop".to_string()),
             native_finish_reason: None,
@@ -122,6 +127,7 @@ impl From<ChatCompletionResponse> for LlmServiceChatCompletionResponse {
                         role: choice.message.role,
                         content: choice.message.content,
                         name: choice.message.name,
+                        tool_call_id: choice.message.tool_call_id,
                         tool_calls: choice.message.tool_calls.map(|tool_calls| {
                             tool_calls.into_iter().map(|tool_call| {
                                 LlmServiceChatCompletionResponseToolCall {
diff --git a/backend/src/controllers/prompt_eval_run.rs b/backend/src/controllers/prompt_eval_run.rs
@@ -73,10 +73,13 @@ pub async fn execute_eval_run(
             .map_err(|_| AppError::InternalServerError("Something went wrong".to_string()))?;
 
         if let Some(c) = res.0.choices.first() {
+            // TODO: We should make the DB field nullable so we don't have to hack this
+            let content = c.message.content.clone().map(|c| c.to_string()).unwrap_or("".to_string());
+
             let eval_run = state
                 .db
                 .prompt_eval_run
-                .create(&run_id, prompt_version_id, e.id, None, &c.message.content)
+                .create(&run_id, prompt_version_id, e.id, None, &content)
                 .await?;
 
             eval_runs.push(eval_run);
diff --git a/backend/src/services/llm.rs b/backend/src/services/llm.rs
@@ -45,20 +45,30 @@ impl Llm {
             let res = self.send_request().await?;
 
             if let Some(c) = res.0.choices.first() {
+                // We don't need to validate the tool response
+                if c.message.role == "tool" {
+                    return Ok(res);
+                }
+
+                let content = match &c.message.content {
+                    Some(c) => c.to_string(),
+                    None => return Err(LlmError::MissingAssistantContent)
+                };
+
                 // if we have a JSON schema available lets use it
                 // Otherwise just make sure it's valid JSON and return
                 match &self.props.request.response_format {
                     Some(rf) => {
                         match &rf.json_schema {
                             Some(js) => {
-                                let is_valid = &self.validate_schema(&c.message.content, &js.schema)?;
+                                let is_valid = &self.validate_schema(&content, &js.schema)?;
                                 if !is_valid {
                                     tracing::error!("The schema was not valid");
                                     return Err(LlmError::InvalidJsonSchema);
                                 }
                             },
                             None => {
-                                let _json: serde_json::Value = serde_json::from_str(&c.message.content)?;
+                                let _json: serde_json::Value = serde_json::from_str(&content)?;
                             } 
                         }
                     },
diff --git a/backend/src/services/providers/openrouter.rs b/backend/src/services/providers/openrouter.rs
@@ -39,8 +39,9 @@ impl<'a> OpenrouterProvider<'a> {
         let messages = self.props.request.messages.iter().map(|msg| {
             openrouter_api::types::chat::Message {
                 role: msg.role().to_string(),
-                content: msg.content().to_string(),
+                content: msg.content(),
                 name: msg.name().map(|n| n.to_string()),
+                tool_call_id: msg.tool_call_id(),
                 tool_calls: match msg {
                     ChatCompletionRequestMessage::Assistant { tool_calls, .. } => {
                         match tool_calls {
@@ -77,8 +78,9 @@ impl<'a> OpenrouterProvider<'a> {
         let messages: Vec<openrouter_api::types::chat::Message> = self.props.request.messages.iter().map(|msg| {
             openrouter_api::types::chat::Message {
                 role: msg.role().to_string(),
-                content: msg.content().to_string(),
+                content: msg.content(),
                 name: msg.name().map(|n| n.to_string()),
+                tool_call_id: msg.tool_call_id(),
                 tool_calls: match msg {
                     ChatCompletionRequestMessage::Assistant { tool_calls, .. } => {
                         match tool_calls {
@@ -105,14 +107,14 @@ impl<'a> OpenrouterProvider<'a> {
         };
 
         let mut stream = self.client.chat()?.chat_completion_stream(request);
-        let mut content = String::new();
+        let mut content: Option<String> = None;
         let mut prompt_tokens = 0;
         let mut completion_tokens = 0;
         let mut total_tokens = 0;
         let mut id = String::new();
 
         while let Some(chunk) = stream.next().await {
-            tracing::info!("chunk: {:?}", chunk);
+            tracing::debug!("chunk: {:?}", chunk);
             match chunk {
                 Ok(c) => {
                     id = c.id.clone();
@@ -125,10 +127,15 @@ impl<'a> OpenrouterProvider<'a> {
 
                     if let Some(c) = &c.choices.first() {
                         if let Some(c) = &c.delta.content {
-                            content += &c;
+                            match &mut content {
+                                Some(cnt) => cnt.push_str(&c),
+                                None => content = Some(c.to_string())
+                            }
                         }
                     }
 
+                    // TODO: Capture tool calls
+
                     if let Err(_) = tx.send(Ok(c.into())).await {
                         break;
                     }
diff --git a/backend/src/services/types/llm_error.rs b/backend/src/services/types/llm_error.rs
@@ -59,6 +59,8 @@ pub enum LlmError {
     PromptTooLong(usize, usize),
     #[error("Content policy violation: {0}")]
     ContentPolicy(String),
+    #[error("Missing assistant content when expected")]
+    MissingAssistantContent,
     
     // Concurrency/Task errors
     #[error("MPSC Sender failed to send message in channel: {0}")]
@@ -96,7 +98,7 @@ pub enum LlmError {
     #[error("Serialization error: {0}")]
     SerializationError(String),
     #[error("Deserialization error: {0}")]
-    DeserializationError(String)
+    DeserializationError(String),
 }
 
 impl From<openrouter_api::Error> for LlmError {
diff --git a/backend/src/services/types/llm_service.rs b/backend/src/services/types/llm_service.rs

Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,7 @@ impl Into<ToolCall> for ChatCompletionRequestToolCall {`
`65`	`65`	`fn into(self) -> ToolCall {`
`66`	`66`	`ToolCall {`
`67`	`67`	`id: Some(self.id),`
`68`		`- index: self.index,`
	`68`	`+ index: None,`
`69`	`69`	`kind: Some(self.kind),`
`70`	`70`	`function_call: self.function_call.into(),`
`71`	`71`	`}`