Link with llm v2

rylev · rylev · commit 1ba24e3deeb2 · 2023-10-31T12:04:09.000+01:00
Signed-off-by: Ryan Levick &lt;ryan.levick@fermyon.com&gt;
diff --git a/crates/llm-local/src/lib.rs b/crates/llm-local/src/lib.rs
@@ -11,7 +11,7 @@ use llm::{
 use rand::SeedableRng;
 use spin_core::async_trait;
 use spin_llm::{LlmEngine, MODEL_ALL_MINILM_L6_V2};
-use spin_world::v1::llm::{self as wasi_llm};
+use spin_world::v2::llm::{self as wasi_llm};
 use std::{
     collections::hash_map::Entry,
     collections::HashMap,
diff --git a/crates/llm-remote-http/src/lib.rs b/crates/llm-remote-http/src/lib.rs
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
 use serde_json::json;
 use spin_core::async_trait;
 use spin_llm::LlmEngine;
-use spin_world::v1::llm::{self as wasi_llm};
+use spin_world::v2::llm::{self as wasi_llm};
 
 #[derive(Clone)]
 pub struct RemoteHttpLlmEngine {
diff --git a/crates/llm/src/host_component.rs b/crates/llm/src/host_component.rs
@@ -25,7 +25,8 @@ impl HostComponent for LlmComponent {
         linker: &mut spin_core::Linker<T>,
         get: impl Fn(&mut spin_core::Data<T>) -> &mut Self::Data + Send + Sync + Copy + 'static,
     ) -> anyhow::Result<()> {
-        spin_world::v1::llm::add_to_linker(linker, get)
+        spin_world::v1::llm::add_to_linker(linker, get)?;
+        spin_world::v2::llm::add_to_linker(linker, get)
     }
 
     fn build_data(&self) -> Self::Data {
diff --git a/crates/llm/src/lib.rs b/crates/llm/src/lib.rs
@@ -2,7 +2,8 @@ pub mod host_component;
 
 use spin_app::MetadataKey;
 use spin_core::async_trait;
-use spin_world::v1::llm::{self as wasi_llm};
+use spin_world::v1::llm::{self as v1};
+use spin_world::v2::llm::{self as v2};
 use std::collections::HashSet;
 
 pub use crate::host_component::LlmComponent;
@@ -14,16 +15,16 @@ pub const AI_MODELS_KEY: MetadataKey<HashSet<String>> = MetadataKey::new("ai_mod
 pub trait LlmEngine: Send + Sync {
     async fn infer(
         &mut self,
-        model: wasi_llm::InferencingModel,
+        model: v1::InferencingModel,
         prompt: String,
-        params: wasi_llm::InferencingParams,
-    ) -> Result<wasi_llm::InferencingResult, wasi_llm::Error>;
+        params: v2::InferencingParams,
+    ) -> Result<v2::InferencingResult, v2::Error>;
 
     async fn generate_embeddings(
         &mut self,
-        model: wasi_llm::EmbeddingModel,
+        model: v2::EmbeddingModel,
         data: Vec<String>,
-    ) -> Result<wasi_llm::EmbeddingsResult, wasi_llm::Error>;
+    ) -> Result<v2::EmbeddingsResult, v2::Error>;
 }
 
 pub struct LlmDispatch {
@@ -32,13 +33,13 @@ pub struct LlmDispatch {
 }
 
 #[async_trait]
-impl wasi_llm::Host for LlmDispatch {
+impl v2::Host for LlmDispatch {
     async fn infer(
         &mut self,
-        model: wasi_llm::InferencingModel,
+        model: v2::InferencingModel,
         prompt: String,
-        params: Option<wasi_llm::InferencingParams>,
-    ) -> anyhow::Result<Result<wasi_llm::InferencingResult, wasi_llm::Error>> {
+        params: Option<v2::InferencingParams>,
+    ) -> anyhow::Result<Result<v2::InferencingResult, v2::Error>> {
         if !self.allowed_models.contains(&model) {
             return Ok(Err(access_denied_error(&model)));
         }
@@ -47,7 +48,7 @@ impl wasi_llm::Host for LlmDispatch {
             .infer(
                 model,
                 prompt,
-                params.unwrap_or(wasi_llm::InferencingParams {
+                params.unwrap_or(v2::InferencingParams {
                     max_tokens: 100,
                     repeat_penalty: 1.1,
                     repeat_penalty_last_n_token_count: 64,
@@ -61,18 +62,46 @@ impl wasi_llm::Host for LlmDispatch {
 
     async fn generate_embeddings(
         &mut self,
-        m: wasi_llm::EmbeddingModel,
+        m: v1::EmbeddingModel,
         data: Vec<String>,
-    ) -> anyhow::Result<Result<wasi_llm::EmbeddingsResult, wasi_llm::Error>> {
+    ) -> anyhow::Result<Result<v2::EmbeddingsResult, v2::Error>> {
         if !self.allowed_models.contains(&m) {
             return Ok(Err(access_denied_error(&m)));
         }
         Ok(self.engine.generate_embeddings(m, data).await)
     }
 }
 
-fn access_denied_error(model: &str) -> wasi_llm::Error {
-    wasi_llm::Error::InvalidInput(format!(
+#[async_trait]
+impl v1::Host for LlmDispatch {
+    async fn infer(
+        &mut self,
+        model: v1::InferencingModel,
+        prompt: String,
+        params: Option<v1::InferencingParams>,
+    ) -> anyhow::Result<Result<v1::InferencingResult, v1::Error>> {
+        Ok(
+            <Self as v2::Host>::infer(self, model, prompt, params.map(Into::into))
+                .await?
+                .map(Into::into)
+                .map_err(Into::into),
+        )
+    }
+
+    async fn generate_embeddings(
+        &mut self,
+        model: v1::EmbeddingModel,
+        data: Vec<String>,
+    ) -> anyhow::Result<Result<v1::EmbeddingsResult, v1::Error>> {
+        Ok(<Self as v2::Host>::generate_embeddings(self, model, data)
+            .await?
+            .map(Into::into)
+            .map_err(Into::into))
+    }
+}
+
+fn access_denied_error(model: &str) -> v2::Error {
+    v2::Error::InvalidInput(format!(
         "The component does not have access to use '{model}'. To give the component access, add '{model}' to the 'ai_models' key for the component in your spin.toml manifest"
     ))
 }
diff --git a/crates/trigger/src/runtime_config/llm.rs b/crates/trigger/src/runtime_config/llm.rs
@@ -1,7 +1,7 @@
 use async_trait::async_trait;
 use spin_llm::LlmEngine;
 use spin_llm_remote_http::RemoteHttpLlmEngine;
-use spin_world::v1::llm as wasi_llm;
+use spin_world::v2::llm as wasi_llm;
 use url::Url;
 
 #[derive(Default)]
diff --git a/crates/world/src/conversions.rs b/crates/world/src/conversions.rs
@@ -168,3 +168,53 @@ mod redis {
         }
     }
 }
+
+mod llm {
+    use super::*;
+
+    impl From<v1::llm::InferencingParams> for v2::llm::InferencingParams {
+        fn from(value: v1::llm::InferencingParams) -> Self {
+            Self {
+                max_tokens: value.max_tokens,
+                repeat_penalty: value.repeat_penalty,
+                repeat_penalty_last_n_token_count: value.repeat_penalty_last_n_token_count,
+                temperature: value.temperature,
+                top_k: value.top_k,
+                top_p: value.top_p,
+            }
+        }
+    }
+
+    impl From<v2::llm::InferencingResult> for v1::llm::InferencingResult {
+        fn from(value: v2::llm::InferencingResult) -> Self {
+            Self {
+                text: value.text,
+                usage: v1::llm::InferencingUsage {
+                    prompt_token_count: value.usage.prompt_token_count,
+                    generated_token_count: value.usage.prompt_token_count,
+                },
+            }
+        }
+    }
+
+    impl From<v2::llm::EmbeddingsResult> for v1::llm::EmbeddingsResult {
+        fn from(value: v2::llm::EmbeddingsResult) -> Self {
+            Self {
+                embeddings: value.embeddings,
+                usage: v1::llm::EmbeddingsUsage {
+                    prompt_token_count: value.usage.prompt_token_count,
+                },
+            }
+        }
+    }
+
+    impl From<v2::llm::Error> for v1::llm::Error {
+        fn from(value: v2::llm::Error) -> Self {
+            match value {
+                v2::llm::Error::ModelNotSupported => Self::ModelNotSupported,
+                v2::llm::Error::RuntimeError(s) => Self::RuntimeError(s),
+                v2::llm::Error::InvalidInput(s) => Self::InvalidInput(s),
+            }
+        }
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,8 @@ impl HostComponent for LlmComponent {`
`25`	`25`	`linker: &mut spin_core::Linker<T>,`
`26`	`26`	`get: impl Fn(&mut spin_core::Data<T>) -> &mut Self::Data + Send + Sync + Copy + 'static,`
`27`	`27`	`) -> anyhow::Result<()> {`
`28`		`- spin_world::v1::llm::add_to_linker(linker, get)`
	`28`	`+ spin_world::v1::llm::add_to_linker(linker, get)?;`
	`29`	`+ spin_world::v2::llm::add_to_linker(linker, get)`
`29`	`30`	`}`
`30`	`31`
`31`	`32`	`fn build_data(&self) -> Self::Data {`