Handles response

seun-ja · seun-ja · commit b552796824de · 2025-08-25T15:32:54.000+01:00
Signed-off-by: Aminu Oluwaseun Joshua &lt;seun.aminujoshua@gmail.com&gt;
diff --git a/crates/factor-llm/src/spin.rs b/crates/factor-llm/src/spin.rs
@@ -122,7 +122,7 @@ impl LlmCompute {
             LlmCompute::RemoteHttp(config) => Arc::new(Mutex::new(RemoteHttpLlmEngine::new(
                 config.url,
                 config.auth_token,
-                config.agent,
+                config.custom_llm.and_then(|c| c.as_str().try_into().ok()),
             ))),
         };
         Ok(engine)
@@ -133,7 +133,7 @@ impl LlmCompute {
 pub struct RemoteHttpCompute {
     url: Url,
     auth_token: String,
-    agent: Option<String>,
+    custom_llm: Option<String>,
 }
 
 /// A noop engine used when the local engine feature is disabled.
diff --git a/crates/llm-remote-http/src/lib.rs b/crates/llm-remote-http/src/lib.rs
@@ -3,7 +3,11 @@ use reqwest::{Client, Url};
 use serde::{Deserialize, Serialize};
 use spin_world::v2::llm::{self as wasi_llm};
 
-use crate::{default::DefaultAgentEngine, open_ai::OpenAIAgentEngine};
+use crate::{
+    default::DefaultAgentEngine,
+    open_ai::OpenAIAgentEngine,
+    schema::{ChatCompletionChoice, Embedding},
+};
 
 mod default;
 mod open_ai;
@@ -25,9 +29,9 @@ pub enum Agent {
 }
 
 impl Agent {
-    pub fn from(url: Url, auth_token: String, agent: Option<String>) -> Self {
+    pub fn from(url: Url, auth_token: String, agent: Option<CustomLlm>) -> Self {
         match agent {
-            Some(agent_name) if agent_name == *"open_ai" => Agent::OpenAI {
+            Some(CustomLlm::OpenAi) => Agent::OpenAI {
                 auth_token,
                 url,
                 client: None,
@@ -70,6 +74,23 @@ struct InferResponseBody {
     usage: InferUsage,
 }
 
+#[derive(Deserialize)]
+struct CreateChatCompletionResponse {
+    _id: String,
+    _object: String,
+    _created: u64,
+    _model: String,
+    choices: Vec<ChatCompletionChoice>,
+    usage: CompletionUsage,
+}
+
+#[derive(Deserialize)]
+struct CompletionUsage {
+    completion_tokens: u32,
+    prompt_tokens: u32,
+    _total_tokens: u32,
+}
+
 #[derive(Deserialize)]
 #[serde(rename_all(deserialize = "camelCase"))]
 struct EmbeddingUsage {
@@ -82,8 +103,31 @@ struct EmbeddingResponseBody {
     usage: EmbeddingUsage,
 }
 
+#[derive(Deserialize)]
+struct CreateEmbeddingResponse {
+    _object: String,
+    _model: String,
+    data: Vec<Embedding>,
+    usage: OpenAIEmbeddingUsage,
+}
+
+impl CreateEmbeddingResponse {
+    fn embeddings(&self) -> Vec<Vec<f32>> {
+        self.data
+            .iter()
+            .map(|embedding| embedding.embedding.clone())
+            .collect()
+    }
+}
+
+#[derive(Deserialize)]
+struct OpenAIEmbeddingUsage {
+    prompt_tokens: u32,
+    _total_tokens: u32,
+}
+
 impl RemoteHttpLlmEngine {
-    pub fn new(url: Url, auth_token: String, agent: Option<String>) -> Self {
+    pub fn new(url: Url, auth_token: String, agent: Option<CustomLlm>) -> Self {
         RemoteHttpLlmEngine {
             agent: Agent::from(url, auth_token, agent),
         }
@@ -166,6 +210,18 @@ impl From<InferResponseBody> for wasi_llm::InferencingResult {
     }
 }
 
+impl From<CreateChatCompletionResponse> for wasi_llm::InferencingResult {
+    fn from(value: CreateChatCompletionResponse) -> Self {
+        Self {
+            text: value.choices[0].message.content.clone(),
+            usage: wasi_llm::InferencingUsage {
+                prompt_token_count: value.usage.prompt_tokens,
+                generated_token_count: value.usage.completion_tokens,
+            },
+        }
+    }
+}
+
 impl From<EmbeddingResponseBody> for wasi_llm::EmbeddingsResult {
     fn from(value: EmbeddingResponseBody) -> Self {
         Self {
@@ -176,3 +232,30 @@ impl From<EmbeddingResponseBody> for wasi_llm::EmbeddingsResult {
         }
     }
 }
+
+impl From<CreateEmbeddingResponse> for wasi_llm::EmbeddingsResult {
+    fn from(value: CreateEmbeddingResponse) -> Self {
+        Self {
+            embeddings: value.embeddings(),
+            usage: wasi_llm::EmbeddingsUsage {
+                prompt_token_count: value.usage.prompt_tokens,
+            },
+        }
+    }
+}
+
+#[derive(Debug, serde::Deserialize, PartialEq)]
+pub enum CustomLlm {
+    OpenAi,
+}
+
+impl TryFrom<&str> for CustomLlm {
+    type Error = anyhow::Error;
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        match value.to_lowercase().as_str() {
+            "open_ai" | "openai" => Ok(CustomLlm::OpenAi),
+            _ => Err(anyhow::anyhow!("Invalid custom LLM: {}", value)),
+        }
+    }
+}
diff --git a/crates/llm-remote-http/src/open_ai.rs b/crates/llm-remote-http/src/open_ai.rs
@@ -6,8 +6,8 @@ use serde::Serialize;
 use spin_world::v2::llm::{self as wasi_llm};
 
 use crate::{
-    schema::{EmbeddingModels, EncodingFormat, Message, Model, Role},
-    EmbeddingResponseBody, InferResponseBody,
+    schema::{EmbeddingModels, EncodingFormat, Model, Prompt, Role},
+    CreateChatCompletionResponse, EmbeddingResponseBody,
 };
 
 pub(crate) struct OpenAIAgentEngine;
@@ -39,11 +39,11 @@ impl OpenAIAgentEngine {
         tracing::info!("Sending remote inference request to {chat_url}");
 
         let body = CreateChatCompletionRequest {
-            // TODO: Joshua: make Role customizable
-            messages: vec![Message::new(Role::User, prompt)],
+            // TODO: Make Role customizable
+            messages: vec![Prompt::new(Role::User, prompt)],
             model: model.as_str().try_into()?,
             max_completion_tokens: Some(params.max_tokens),
-            frequency_penalty: Some(params.repeat_penalty), // TODO: Joshua: change to frequency_penalty
+            frequency_penalty: Some(params.repeat_penalty),
             reasoning_effort: None,
             verbosity: None,
         };
@@ -58,7 +58,7 @@ impl OpenAIAgentEngine {
                 wasi_llm::Error::RuntimeError(format!("POST /infer request error: {err}"))
             })?;
 
-        match resp.json::<InferResponseBody>().await {
+        match resp.json::<CreateChatCompletionResponse>().await {
             Ok(val) => Ok(val.into()),
             Err(err) => Err(wasi_llm::Error::RuntimeError(format!(
                 "Failed to deserialize response for \"POST  /index\": {err}"
@@ -118,7 +118,7 @@ impl OpenAIAgentEngine {
 
 #[derive(Serialize, Debug)]
 struct CreateChatCompletionRequest {
-    messages: Vec<Message>,
+    messages: Vec<Prompt>,
     model: Model,
     #[serde(skip_serializing_if = "Option::is_none")]
     max_completion_tokens: Option<u32>,
diff --git a/crates/llm-remote-http/src/schema.rs b/crates/llm-remote-http/src/schema.rs
@@ -1,8 +1,9 @@
 use std::fmt::Display;
 
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use spin_world::v2::llm as wasi_llm;
 
+/// LLM model
 #[derive(Serialize, Debug)]
 pub enum Model {
     GPT5,
@@ -69,12 +70,12 @@ impl Display for Model {
 }
 
 #[derive(Serialize, Debug)]
-pub struct Message {
+pub struct Prompt {
     role: Role,
     content: String,
 }
 
-impl Message {
+impl Prompt {
     pub fn new(role: Role, content: String) -> Self {
         Self { role, content }
     }
@@ -232,3 +233,45 @@ impl TryFrom<&str> for Verbosity {
         }
     }
 }
+
+#[derive(Deserialize)]
+pub struct ChatCompletionChoice {
+    /// The index of the choice in the list of choices
+    _index: u32,
+    pub message: ChatCompletionResponseMessage,
+    /// The reason the model stopped generating tokens. This will be `stop` if the model hit a
+    /// natural stop point or a provided stop sequence,
+    _finish_reason: String,
+    /// Log probability information for the choice.
+    _logprobs: Option<Logprobs>,
+}
+
+#[derive(Deserialize)]
+/// A chat completion message generated by the model.
+pub struct ChatCompletionResponseMessage {
+    /// The role of the author of this message
+    _role: String,
+    /// The contents of the message
+    pub content: String,
+    /// The refusal message generated by the model
+    _refusal: Option<String>,
+}
+
+#[derive(Deserialize)]
+pub struct Logprobs {
+    /// A list of message content tokens with log probability information.
+    _content: Option<Vec<String>>,
+    /// A list of message refusal tokens with log probability information.
+    _refusal: Option<Vec<String>>,
+}
+
+#[derive(Deserialize)]
+pub struct Embedding {
+    /// The index of the embedding in the list of embeddings..
+    _index: u32,
+    /// The embedding vector, which is a list of floats. The length of vector depends on the model as
+    /// listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings).
+    pub embedding: Vec<f32>,
+    /// The object type, which is always "embedding"
+    _object: String,
+}