Skip to content

Commit 752939c

Browse files
authored
Merge pull request #2748 from fermyon/llm-features
Put local llm behind feature flags like they used to be.
2 parents 6db2872 + 85b55a3 commit 752939c

File tree

5 files changed

+94
-34
lines changed

5 files changed

+94
-34
lines changed

Cargo.toml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,9 @@ wit-component = "0.19.0"
113113
# TODO(factors): default = ["llm"]
114114
all-tests = ["extern-dependencies-tests"]
115115
extern-dependencies-tests = []
116-
# TODO(factors):
117-
# llm = ["spin-trigger-http/llm"]
118-
# llm-metal = ["llm", "spin-trigger-http/llm-metal"]
119-
# llm-cublas = ["llm", "spin-trigger-http/llm-cublas"]
116+
llm = ["spin-trigger-http2/llm"]
117+
llm-metal = ["llm", "spin-trigger-http2/llm-metal"]
118+
llm-cublas = ["llm", "spin-trigger-http2/llm-cublas"]
120119

121120
[workspace]
122121
members = [

crates/factor-llm/Cargo.toml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,24 @@ homepage.workspace = true
88
repository.workspace = true
99
rust-version.workspace = true
1010

11+
[features]
12+
llm = ["spin-llm-local"]
13+
llm-metal = ["llm", "spin-llm-local/metal"]
14+
llm-cublas = ["llm", "spin-llm-local/cublas"]
15+
1116
[dependencies]
1217
anyhow = "1.0"
1318
async-trait = "0.1"
1419
serde = "1.0"
1520
spin-factors = { path = "../factors" }
16-
spin-llm-local = { path = "../llm-local" }
21+
spin-llm-local = { path = "../llm-local", optional = true }
1722
spin-llm-remote-http = { path = "../llm-remote-http" }
1823
spin-locked-app = { path = "../locked-app" }
1924
spin-world = { path = "../world" }
2025
tracing = { workspace = true }
2126
tokio = { version = "1", features = ["sync"] }
2227
toml = "0.8"
23-
url = "2"
28+
url = { version = "2", features = ["serde"] }
2429

2530
[dev-dependencies]
2631
spin-factors-test = { path = "../factors-test" }

crates/factor-llm/src/spin.rs

Lines changed: 74 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
use std::path::PathBuf;
22
use std::sync::Arc;
33

4-
pub use spin_llm_local::LocalLlmEngine;
5-
64
use spin_llm_remote_http::RemoteHttpLlmEngine;
75
use spin_world::async_trait;
86
use spin_world::v1::llm::{self as v1};
@@ -12,26 +10,48 @@ use url::Url;
1210

1311
use crate::{LlmEngine, LlmEngineCreator, RuntimeConfig};
1412

15-
#[async_trait]
16-
impl LlmEngine for LocalLlmEngine {
17-
async fn infer(
18-
&mut self,
19-
model: v1::InferencingModel,
20-
prompt: String,
21-
params: v2::InferencingParams,
22-
) -> Result<v2::InferencingResult, v2::Error> {
23-
self.infer(model, prompt, params).await
24-
}
13+
#[cfg(feature = "llm")]
14+
mod local {
15+
use super::*;
16+
pub use spin_llm_local::LocalLlmEngine;
2517

26-
async fn generate_embeddings(
27-
&mut self,
28-
model: v2::EmbeddingModel,
29-
data: Vec<String>,
30-
) -> Result<v2::EmbeddingsResult, v2::Error> {
31-
self.generate_embeddings(model, data).await
18+
#[async_trait]
19+
impl LlmEngine for LocalLlmEngine {
20+
async fn infer(
21+
&mut self,
22+
model: v2::InferencingModel,
23+
prompt: String,
24+
params: v2::InferencingParams,
25+
) -> Result<v2::InferencingResult, v2::Error> {
26+
self.infer(model, prompt, params).await
27+
}
28+
29+
async fn generate_embeddings(
30+
&mut self,
31+
model: v2::EmbeddingModel,
32+
data: Vec<String>,
33+
) -> Result<v2::EmbeddingsResult, v2::Error> {
34+
self.generate_embeddings(model, data).await
35+
}
3236
}
3337
}
3438

39+
/// The default engine creator for the LLM factor when used in the Spin CLI.
40+
pub fn default_engine_creator(
41+
state_dir: PathBuf,
42+
use_gpu: bool,
43+
) -> impl LlmEngineCreator + 'static {
44+
#[cfg(feature = "llm")]
45+
let engine = spin_llm_local::LocalLlmEngine::new(state_dir.join("ai-models"), use_gpu);
46+
#[cfg(not(feature = "llm"))]
47+
let engine = {
48+
let _ = (state_dir, use_gpu);
49+
noop::NoopLlmEngine
50+
};
51+
let engine = Arc::new(Mutex::new(engine)) as Arc<Mutex<dyn LlmEngine>>;
52+
move || engine.clone()
53+
}
54+
3555
#[async_trait]
3656
impl LlmEngine for RemoteHttpLlmEngine {
3757
async fn infer(
@@ -77,6 +97,12 @@ pub enum LlmCompute {
7797
impl LlmCompute {
7898
fn into_engine(self, state_dir: PathBuf, use_gpu: bool) -> Arc<Mutex<dyn LlmEngine>> {
7999
match self {
100+
#[cfg(not(feature = "llm"))]
101+
LlmCompute::Spin => {
102+
let _ = (state_dir, use_gpu);
103+
Arc::new(Mutex::new(noop::NoopLlmEngine))
104+
}
105+
#[cfg(feature = "llm")]
80106
LlmCompute::Spin => default_engine_creator(state_dir, use_gpu).create(),
81107
LlmCompute::RemoteHttp(config) => Arc::new(Mutex::new(RemoteHttpLlmEngine::new(
82108
config.url,
@@ -92,15 +118,35 @@ pub struct RemoteHttpCompute {
92118
auth_token: String,
93119
}
94120

95-
/// The default engine creator for the LLM factor when used in the Spin CLI.
96-
pub fn default_engine_creator(
97-
state_dir: PathBuf,
98-
use_gpu: bool,
99-
) -> impl LlmEngineCreator + 'static {
100-
move || {
101-
Arc::new(Mutex::new(LocalLlmEngine::new(
102-
state_dir.join("ai-models"),
103-
use_gpu,
104-
))) as _
121+
/// A noop engine used when the local engine feature is disabled.
122+
#[cfg(not(feature = "llm"))]
123+
mod noop {
124+
use super::*;
125+
126+
#[derive(Clone, Copy)]
127+
pub(super) struct NoopLlmEngine;
128+
129+
#[async_trait]
130+
impl LlmEngine for NoopLlmEngine {
131+
async fn infer(
132+
&mut self,
133+
_model: v2::InferencingModel,
134+
_prompt: String,
135+
_params: v2::InferencingParams,
136+
) -> Result<v2::InferencingResult, v2::Error> {
137+
Err(v2::Error::RuntimeError(
138+
"Local LLM operations are not supported in this version of Spin.".into(),
139+
))
140+
}
141+
142+
async fn generate_embeddings(
143+
&mut self,
144+
_model: v2::EmbeddingModel,
145+
_data: Vec<String>,
146+
) -> Result<v2::EmbeddingsResult, v2::Error> {
147+
Err(v2::Error::RuntimeError(
148+
"Local LLM operations are not supported in this version of Spin.".into(),
149+
))
150+
}
105151
}
106152
}

crates/trigger-http2/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ edition = { workspace = true }
77
[lib]
88
doctest = false
99

10+
[features]
11+
llm = ["spin-trigger2/llm"]
12+
llm-metal = ["spin-trigger2/llm-metal"]
13+
llm-cublas = ["spin-trigger2/llm-cublas"]
14+
1015
[dependencies]
1116
anyhow = "1.0"
1217
async-trait = "0.1"

crates/trigger2/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ homepage.workspace = true
88
repository.workspace = true
99
rust-version.workspace = true
1010

11+
[features]
12+
llm = ["spin-factor-llm/llm"]
13+
llm-metal = ["spin-factor-llm/llm-metal"]
14+
llm-cublas = ["spin-factor-llm/llm-cublas"]
15+
1116
[dependencies]
1217
anyhow = "1"
1318
clap = { version = "3.1.18", features = ["derive", "env"] }

0 commit comments

Comments
 (0)