feat: add model selection and fetch functionality to demo

ilopezluna · ilopezluna · commit 4e98f994fc55 · 2025-10-08T17:43:19.000+02:00
diff --git a/demos/multimodal/README.md b/demos/multimodal/README.md
@@ -71,6 +71,10 @@ If you prefer not to use Docker Desktop, you can run Docker Model Runner directl
 3. **Configure the Demo**
    - **Base API**: By default set to `http://localhost:12434/engines/llama.cpp`
      - Change the port if you configured Docker Model Runner on a different port
+   - **Model**: Select from available models pulled to your Docker Model Runner
+     - The demo automatically fetches and displays all available models
+     - SmolVLM models will be auto-selected if available
+     - If model fetching fails, it falls back to `ai/smolvlm:500M-Q8_0`
    - **Instruction**: Enter what you want the model to analyze (default: "What do you see?")
      - Examples: "Describe the scene", "What objects can you see?", "What is the person doing?"
    - **Interval**: Choose how often to send requests to the model (default: 500ms)
diff --git a/demos/multimodal/demo.html b/demos/multimodal/demo.html
@@ -89,7 +89,13 @@ <h1>Camera Interaction App</h1>
 <div class="io-areas">
     <div>
         <label for="baseURL">Base API:</label><br>
-        <input id="baseURL" name="Instruction" value="http://localhost:12434/engines/llama.cpp"></textarea>
+        <input id="baseURL" name="Instruction" value="http://127.0.0.1:12434/engines/llama.cpp">
+    </div>
+    <div>
+        <label for="modelSelect">Model:</label><br>
+        <select id="modelSelect" name="Model" style="width: 40em; padding: 8px;">
+            <option value="">Loading models...</option>
+        </select>
     </div>
     <div>
         <label for="instructionText">Instruction:</label><br>
@@ -118,6 +124,7 @@ <h1>Camera Interaction App</h1>
     const video = document.getElementById('videoFeed');
     const canvas = document.getElementById('canvas');
     const baseURL = document.getElementById('baseURL');
+    const modelSelect = document.getElementById('modelSelect');
     const instructionText = document.getElementById('instructionText');
     const responseText = document.getElementById('responseText');
     const intervalSelect = document.getElementById('intervalSelect');
@@ -129,44 +136,110 @@ <h1>Camera Interaction App</h1>
     let intervalId;
     let isProcessing = false;
 
+    // Fetch available models from the API
+    async function fetchModels() {
+        try {
+            const response = await fetch(`${baseURL.value}/v1/models`);
+            if (!response.ok) {
+                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+            }
+            const data = await response.json();
+            
+            // Clear the loading option
+            modelSelect.innerHTML = '';
+            
+            if (data && data.length > 0) {
+                let totalTags = 0;
+                // Populate dropdown with available models using their tags
+                data.forEach(model => {
+                    if (model.tags && model.tags.length > 0) {
+                        model.tags.forEach(tag => {
+                            const option = document.createElement('option');
+                            option.value = tag;
+                            option.textContent = tag;
+                            modelSelect.appendChild(option);
+                            totalTags++;
+                        });
+                    }
+                });
+                
+                if (totalTags > 0) {
+                    // Try to select smolvlm model by default, or use the first option
+                    const options = Array.from(modelSelect.options);
+                    const smolvlmOption = options.find(opt => opt.value.toLowerCase().includes('smolvlm'));
+                    if (smolvlmOption) {
+                        modelSelect.value = smolvlmOption.value;
+                    } else {
+                        modelSelect.value = options[0].value;
+                    }
+                    
+                    responseText.value = `Found ${totalTags} model(s). Ready to start.`;
+                } else {
+                    modelSelect.innerHTML = '<option value="">No tagged models available</option>';
+                    responseText.value = "No tagged models found. Please pull a model first.";
+                }
+            } else {
+                modelSelect.innerHTML = '<option value="">No models available</option>';
+                responseText.value = "No models found. Please pull a model first.";
+            }
+        } catch (error) {
+            console.error('Error fetching models:', error);
+            modelSelect.innerHTML = '<option value="ai/smolvlm:500M-Q8_0">ai/smolvlm:500M-Q8_0 (fallback)</option>';
+            responseText.value = `Could not fetch models: ${error.message}. Using fallback model.`;
+        }
+    }
+
     // Returns response text (string)
     async function sendChatCompletionRequest(instruction, imageBase64URL) {
+        const selectedModel = modelSelect.value;
+        if (!selectedModel) {
+            return "Error: No model selected";
+        }
+        
         const response = await fetch(`${baseURL.value}/v1/chat/completions`, {
             method: 'POST',
             headers: {
                 'Content-Type': 'application/json'
             },
             body: JSON.stringify({
+                model: selectedModel,
                 max_tokens: 100,
-                model: "ai/smolvlm:500M-Q8_0",
                 messages: [
-                    {
-                        role: 'user', content: [
-                            {type: 'text', text: instruction},
-                            {
-                                type: 'image_url', image_url: {
+                    { role: 'user', content: [
+                            { type: 'text', text: instruction },
+                            { type: 'image_url', image_url: {
                                     url: imageBase64URL,
-                                }
-                            }
-                        ]
-                    },
+                                } }
+                        ] },
                 ]
             })
         });
         if (!response.ok) {
-            const errorData = await response.text();
-            return `Server error: ${response.status} - ${errorData}`;
+            const errorText = await response.text();
+            try {
+                const errorData = JSON.parse(errorText);
+                // Check if error message indicates no multimodal support
+                if (errorData.error && errorData.error.message && 
+                    errorData.error.message.includes('image input is not supported')) {
+                    return "Error: The selected model does not support image input. Please select a vision model (e.g., SmolVLM).";
+                }
+                return `Server error: ${response.status} - ${errorData.error?.message || errorText}`;
+            } catch (e) {
+                // If JSON parse fails, use the raw text
+                return `Server error: ${response.status} - ${errorText}`;
+            }
         }
         const data = await response.json();
         return data.choices[0].message.content;
     }
 
-    // 1. Ask for camera permission on load
+    // 1. Ask for camera permission and fetch models on load
     async function initCamera() {
         try {
-            stream = await navigator.mediaDevices.getUserMedia({video: true, audio: false});
+            stream = await navigator.mediaDevices.getUserMedia({ video: true, audio: false });
             video.srcObject = stream;
-            responseText.value = "Camera access granted. Ready to start.";
+            responseText.value = "Camera access granted. Loading models...";
+            await fetchModels();
         } catch (err) {
             console.error("Error accessing camera:", err);
             responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
@@ -226,6 +299,7 @@ <h1>Camera Interaction App</h1>
 
         instructionText.disabled = true;
         intervalSelect.disabled = true;
+        modelSelect.disabled = true;
 
         responseText.value = "Processing started...";
 
@@ -250,6 +324,7 @@ <h1>Camera Interaction App</h1>
 
         instructionText.disabled = false;
         intervalSelect.disabled = false;
+        modelSelect.disabled = false;
         if (responseText.value.startsWith("Processing started...")) {
             responseText.value = "Processing stopped.";
         }
@@ -278,4 +353,4 @@ <h1>Camera Interaction App</h1>
 
 </script>
 </body>
-</html>
+</html>