fix(llm, llamacpp): Loosen model ID validation (#163)

JeanMertz · web-flow · commit 0296692106ca · 2025-06-27T10:13:21.000+02:00
The validation logic for model IDs was too strict to support model names often used in the Llama.cpp ecosystem. For example, a model ID like `llamacpp/bartowski_Qwen2.5-7B-Instruct-GGUF_Qwen2.5-7B-Instruct-Q4_K_M.gguf` was previously rejected, but can now be used with JP. Additionally, the model listing implementation was inadvertently left out of the implementation of the Llamacpp provider. This commit fixes that oversight. It should be noted that it is not possible (unless the llama.cpp server is run with the `--alias` flag[1]) to get the proper model name from the API (instead, it points to the model file on disk), so in reality this implementation isn't particularly useful, but at least it doesn't panic anymore. [1]: https://github.com/ggml-org/llama.cpp/blob/8846aace4934ad29651ea61b8c7e3f6b0556e3d2/tools/server/README.md#get-v1models-openai-compatible-model-info-api Signed-off-by: Jean Mertz <git@jeanmertz.com>
diff --git a/crates/jp_llm/src/provider/llamacpp.rs b/crates/jp_llm/src/provider/llamacpp.rs
@@ -8,7 +8,7 @@ use jp_conversation::{
     AssistantMessage, MessagePair, UserMessage,
 };
 use jp_mcp::tool::{self, ToolChoice};
-use jp_model::ModelId;
+use jp_model::{ModelId, ProviderId};
 use jp_query::query::ChatQuery;
 use openai::{
     chat::{
@@ -22,25 +22,24 @@ use openai::{
 use serde::Serialize;
 use tracing::{debug, trace};
 
-use super::{CompletionChunk, Delta, EventStream, ModelDetails, StreamEvent};
+use super::{
+    openai::{ModelListResponse, ModelResponse},
+    CompletionChunk, Delta, EventStream, ModelDetails, StreamEvent,
+};
 use crate::{
     error::{Error, Result},
     provider::{handle_delta, AccumulationState, Provider, ReasoningExtractor},
 };
 
 #[derive(Debug, Clone)]
 pub struct Llamacpp {
+    reqwest_client: reqwest::Client,
     credentials: Credentials,
+    base_url: String,
 }
 
 impl Llamacpp {
-    fn new(base_url: String) -> Self {
-        let credentials = Credentials::new("", base_url);
-
-        Self { credentials }
-    }
-
-    /// Build request for Openai API.
+    /// Build request for Llama.cpp API.
     fn build_request(
         &self,
         model_id: &ModelId,
@@ -63,7 +62,7 @@ impl Llamacpp {
             slug,
             messages_size = messages.len(),
             tools_size = tools.len(),
-            "Built Openai request."
+            "Built Llamacpp request."
         );
 
         Ok(ChatCompletionDelta::builder(slug, messages)
@@ -76,7 +75,18 @@ impl Llamacpp {
 #[async_trait]
 impl Provider for Llamacpp {
     async fn models(&self) -> Result<Vec<ModelDetails>> {
-        todo!()
+        Ok(self
+            .reqwest_client
+            .get(format!("{}/v1/models", self.base_url))
+            .send()
+            .await?
+            .error_for_status()?
+            .json::<ModelListResponse>()
+            .await?
+            .data
+            .iter()
+            .map(map_model)
+            .collect())
     }
 
     async fn chat_completion_stream(
@@ -182,13 +192,34 @@ fn map_content(
     events
 }
 
+fn map_model(model: &ModelResponse) -> ModelDetails {
+    ModelDetails {
+        provider: ProviderId::Llamacpp,
+        slug: model
+            .id
+            .rsplit_once('/')
+            .map_or(model.id.as_str(), |(_, v)| v)
+            .to_string(),
+        context_window: None,
+        max_output_tokens: None,
+        reasoning: None,
+        knowledge_cutoff: None,
+    }
+}
+
 impl TryFrom<&assistant::provider::llamacpp::Llamacpp> for Llamacpp {
     type Error = Error;
 
     fn try_from(config: &assistant::provider::llamacpp::Llamacpp) -> Result<Self> {
+        let reqwest_client = reqwest::Client::builder().build()?;
         let base_url = config.base_url.clone();
+        let credentials = Credentials::new("", &base_url);
 
-        Ok(Llamacpp::new(base_url))
+        Ok(Llamacpp {
+            reqwest_client,
+            credentials,
+            base_url,
+        })
     }
 }
 
@@ -461,41 +492,29 @@ mod tests {
         Vcr::new("http://127.0.0.1:8080", fixtures)
     }
 
-    // #[test(tokio::test)]
-    // async fn test_llamacpp_models() -> std::result::Result<(), Box<dyn std::error::Error>> {
-    //     let mut config =
-    //         assistant::Assistant::from_partial(assistant::AssistantPartial::default_values())
-    //             .unwrap()
-    //             .provider
-    //             .openai;
-    //
-    //     let vcr = vcr();
-    //     vcr.cassette(
-    //         function_name!(),
-    //         |rule| {
-    //             rule.filter(|when| {
-    //                 when.any_request();
-    //             });
-    //         },
-    //         |recording, url| async move {
-    //             config.base_url = url;
-    //             if !recording {
-    //                 // dummy api key value when replaying a cassette
-    //                 config.api_key_env = "USER".to_owned();
-    //             }
-    //
-    //             Openai::try_from(&config)
-    //                 .unwrap()
-    //                 .models()
-    //                 .await
-    //                 .map(|mut v| {
-    //                     v.truncate(10);
-    //                     v
-    //                 })
-    //         },
-    //     )
-    //     .await
-    // }
+    #[test(tokio::test)]
+    async fn test_llamacpp_models() -> std::result::Result<(), Box<dyn std::error::Error>> {
+        let mut config =
+            assistant::Assistant::from_partial(assistant::AssistantPartial::default_values())
+                .unwrap()
+                .provider
+                .llamacpp;
+
+        let vcr = vcr();
+        vcr.cassette(
+            function_name!(),
+            |rule| {
+                rule.filter(|when| {
+                    when.any_request();
+                });
+            },
+            |_, url| async move {
+                config.base_url = url;
+                Llamacpp::try_from(&config).unwrap().models().await
+            },
+        )
+        .await
+    }
 
     #[test(tokio::test)]
     async fn test_llamacpp_chat_completion() -> std::result::Result<(), Box<dyn std::error::Error>>
diff --git a/crates/jp_llm/src/provider/openai.rs b/crates/jp_llm/src/provider/openai.rs
@@ -146,15 +146,15 @@ impl Provider for Openai {
 
 #[derive(Debug, Deserialize)]
 #[expect(dead_code)]
-struct ModelListResponse {
+pub(crate) struct ModelListResponse {
     object: String,
-    data: Vec<ModelResponse>,
+    pub data: Vec<ModelResponse>,
 }
 
 #[derive(Debug, Deserialize)]
 #[expect(dead_code)]
-struct ModelResponse {
-    id: String,
+pub(crate) struct ModelResponse {
+    pub id: String,
     object: String,
     #[serde(with = "time::serde::timestamp")]
     created: OffsetDateTime,
diff --git a/crates/jp_llm/tests/fixtures/test_llamacpp_models.snap b/crates/jp_llm/tests/fixtures/test_llamacpp_models.snap
@@ -0,0 +1,16 @@
+---
+source: crates/jp_test/src/mock.rs
+expression: expr
+---
+Ok(
+    [
+        ModelDetails {
+            provider: Llamacpp,
+            slug: "bartowski_Qwen2.5-7B-Instruct-GGUF_Qwen2.5-7B-Instruct-Q4_K_M.gguf",
+            context_window: None,
+            max_output_tokens: None,
+            reasoning: None,
+            knowledge_cutoff: None,
+        },
+    ],
+)
diff --git a/crates/jp_llm/tests/fixtures/test_llamacpp_models.yml b/crates/jp_llm/tests/fixtures/test_llamacpp_models.yml
@@ -0,0 +1,9 @@
+when:
+  path: /v1/models
+  method: GET
+then:
+  status: 200
+  header:
+    - name: content-type
+      value: application/json; charset=utf-8
+  body: "{\"models\":[{\"name\":\"/llama.cpp/bartowski_Qwen2.5-7B-Instruct-GGUF_Qwen2.5-7B-Instruct-Q4_K_M.gguf\",\"model\":\"/llama.cpp/bartowski_Qwen2.5-7B-Instruct-GGUF_Qwen2.5-7B-Instruct-Q4_K_M.gguf\",\"modified_at\":\"\",\"size\":\"\",\"digest\":\"\",\"type\":\"model\",\"description\":\"\",\"tags\":[\"\"],\"capabilities\":[\"completion\"],\"parameters\":\"\",\"details\":{\"parent_model\":\"\",\"format\":\"gguf\",\"family\":\"\",\"families\":[\"\"],\"parameter_size\":\"\",\"quantization_level\":\"\"}}],\"object\":\"list\",\"data\":[{\"id\":\"/llama.cpp/bartowski_Qwen2.5-7B-Instruct-GGUF_Qwen2.5-7B-Instruct-Q4_K_M.gguf\",\"object\":\"model\",\"created\":1751010042,\"owned_by\":\"llamacpp\",\"meta\":{\"vocab_type\":2,\"n_vocab\":152064,\"n_ctx_train\":32768,\"n_embd\":3584,\"n_params\":7615616512,\"size\":4677120000}}]}"
diff --git a/crates/jp_model/src/lib.rs b/crates/jp_model/src/lib.rs
@@ -115,19 +115,26 @@ impl FromStr for ModelId {
     type Err = Error;
 
     fn from_str(s: &str) -> Result<Self> {
-        let (provider, name) = s.split_once('/').unwrap_or(("", s));
-
-        if name.chars().any(|c| {
-            !(c.is_numeric()
-                || (c.is_ascii_alphabetic() && c.is_ascii_lowercase())
-                || c == '-'
-                || c == '_'
-                || c == '.'
-                || c == ':'
-                || c == '/')
-        }) {
+        let (provider, name) =
+            s.split_once('/')
+                .map(|(p, n)| (p.trim(), n.trim()))
+                .ok_or(Error::InvalidIdFormat(
+                    "ID must match <provider>/<model>".to_owned(),
+                ))?;
+
+        if name.is_empty()
+            || name.chars().any(|c| {
+                !(c.is_numeric()
+                    || c.is_ascii_alphabetic()
+                    || c == '-'
+                    || c == '_'
+                    || c == '.'
+                    || c == ':'
+                    || c == '/')
+            })
+        {
             return Err(Error::InvalidIdFormat(
-                "Model ID must be [a-z0-9_-.:/]".to_string(),
+                "Model ID must be [a-zA-Z0-9_-.:/]+".to_string(),
             ));
         }
 
@@ -197,6 +204,7 @@ impl FromStr for ProviderId {
             "openai" => Ok(Self::Openai),
             "openrouter" => Ok(Self::Openrouter),
             "ollama" => Ok(Self::Ollama),
+            _ if s.is_empty() => Err(Error::InvalidProviderId("<empty>".to_owned())),
             _ => Err(Error::InvalidProviderId(s.to_owned())),
         }
     }