Skip to content

Commit 4e98f99

Browse files
committed
feat: add model selection and fetch functionality to demo
1 parent 0aea05b commit 4e98f99

File tree

2 files changed

+96
-17
lines changed

2 files changed

+96
-17
lines changed

demos/multimodal/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ If you prefer not to use Docker Desktop, you can run Docker Model Runner directl
7171
3. **Configure the Demo**
7272
- **Base API**: By default set to `http://localhost:12434/engines/llama.cpp`
7373
- Change the port if you configured Docker Model Runner on a different port
74+
- **Model**: Select from available models pulled to your Docker Model Runner
75+
- The demo automatically fetches and displays all available models
76+
- SmolVLM models will be auto-selected if available
77+
- If model fetching fails, it falls back to `ai/smolvlm:500M-Q8_0`
7478
- **Instruction**: Enter what you want the model to analyze (default: "What do you see?")
7579
- Examples: "Describe the scene", "What objects can you see?", "What is the person doing?"
7680
- **Interval**: Choose how often to send requests to the model (default: 500ms)

demos/multimodal/demo.html

Lines changed: 92 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,13 @@ <h1>Camera Interaction App</h1>
8989
<div class="io-areas">
9090
<div>
9191
<label for="baseURL">Base API:</label><br>
92-
<input id="baseURL" name="Instruction" value="http://localhost:12434/engines/llama.cpp"></textarea>
92+
<input id="baseURL" name="Instruction" value="http://127.0.0.1:12434/engines/llama.cpp">
93+
</div>
94+
<div>
95+
<label for="modelSelect">Model:</label><br>
96+
<select id="modelSelect" name="Model" style="width: 40em; padding: 8px;">
97+
<option value="">Loading models...</option>
98+
</select>
9399
</div>
94100
<div>
95101
<label for="instructionText">Instruction:</label><br>
@@ -118,6 +124,7 @@ <h1>Camera Interaction App</h1>
118124
const video = document.getElementById('videoFeed');
119125
const canvas = document.getElementById('canvas');
120126
const baseURL = document.getElementById('baseURL');
127+
const modelSelect = document.getElementById('modelSelect');
121128
const instructionText = document.getElementById('instructionText');
122129
const responseText = document.getElementById('responseText');
123130
const intervalSelect = document.getElementById('intervalSelect');
@@ -129,44 +136,110 @@ <h1>Camera Interaction App</h1>
129136
let intervalId;
130137
let isProcessing = false;
131138

139+
// Fetch available models from the API
140+
async function fetchModels() {
141+
try {
142+
const response = await fetch(`${baseURL.value}/v1/models`);
143+
if (!response.ok) {
144+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
145+
}
146+
const data = await response.json();
147+
148+
// Clear the loading option
149+
modelSelect.innerHTML = '';
150+
151+
if (data && data.length > 0) {
152+
let totalTags = 0;
153+
// Populate dropdown with available models using their tags
154+
data.forEach(model => {
155+
if (model.tags && model.tags.length > 0) {
156+
model.tags.forEach(tag => {
157+
const option = document.createElement('option');
158+
option.value = tag;
159+
option.textContent = tag;
160+
modelSelect.appendChild(option);
161+
totalTags++;
162+
});
163+
}
164+
});
165+
166+
if (totalTags > 0) {
167+
// Try to select smolvlm model by default, or use the first option
168+
const options = Array.from(modelSelect.options);
169+
const smolvlmOption = options.find(opt => opt.value.toLowerCase().includes('smolvlm'));
170+
if (smolvlmOption) {
171+
modelSelect.value = smolvlmOption.value;
172+
} else {
173+
modelSelect.value = options[0].value;
174+
}
175+
176+
responseText.value = `Found ${totalTags} model(s). Ready to start.`;
177+
} else {
178+
modelSelect.innerHTML = '<option value="">No tagged models available</option>';
179+
responseText.value = "No tagged models found. Please pull a model first.";
180+
}
181+
} else {
182+
modelSelect.innerHTML = '<option value="">No models available</option>';
183+
responseText.value = "No models found. Please pull a model first.";
184+
}
185+
} catch (error) {
186+
console.error('Error fetching models:', error);
187+
modelSelect.innerHTML = '<option value="ai/smolvlm:500M-Q8_0">ai/smolvlm:500M-Q8_0 (fallback)</option>';
188+
responseText.value = `Could not fetch models: ${error.message}. Using fallback model.`;
189+
}
190+
}
191+
132192
// Returns response text (string)
133193
async function sendChatCompletionRequest(instruction, imageBase64URL) {
194+
const selectedModel = modelSelect.value;
195+
if (!selectedModel) {
196+
return "Error: No model selected";
197+
}
198+
134199
const response = await fetch(`${baseURL.value}/v1/chat/completions`, {
135200
method: 'POST',
136201
headers: {
137202
'Content-Type': 'application/json'
138203
},
139204
body: JSON.stringify({
205+
model: selectedModel,
140206
max_tokens: 100,
141-
model: "ai/smolvlm:500M-Q8_0",
142207
messages: [
143-
{
144-
role: 'user', content: [
145-
{type: 'text', text: instruction},
146-
{
147-
type: 'image_url', image_url: {
208+
{ role: 'user', content: [
209+
{ type: 'text', text: instruction },
210+
{ type: 'image_url', image_url: {
148211
url: imageBase64URL,
149-
}
150-
}
151-
]
152-
},
212+
} }
213+
] },
153214
]
154215
})
155216
});
156217
if (!response.ok) {
157-
const errorData = await response.text();
158-
return `Server error: ${response.status} - ${errorData}`;
218+
const errorText = await response.text();
219+
try {
220+
const errorData = JSON.parse(errorText);
221+
// Check if error message indicates no multimodal support
222+
if (errorData.error && errorData.error.message &&
223+
errorData.error.message.includes('image input is not supported')) {
224+
return "Error: The selected model does not support image input. Please select a vision model (e.g., SmolVLM).";
225+
}
226+
return `Server error: ${response.status} - ${errorData.error?.message || errorText}`;
227+
} catch (e) {
228+
// If JSON parse fails, use the raw text
229+
return `Server error: ${response.status} - ${errorText}`;
230+
}
159231
}
160232
const data = await response.json();
161233
return data.choices[0].message.content;
162234
}
163235

164-
// 1. Ask for camera permission on load
236+
// 1. Ask for camera permission and fetch models on load
165237
async function initCamera() {
166238
try {
167-
stream = await navigator.mediaDevices.getUserMedia({video: true, audio: false});
239+
stream = await navigator.mediaDevices.getUserMedia({ video: true, audio: false });
168240
video.srcObject = stream;
169-
responseText.value = "Camera access granted. Ready to start.";
241+
responseText.value = "Camera access granted. Loading models...";
242+
await fetchModels();
170243
} catch (err) {
171244
console.error("Error accessing camera:", err);
172245
responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
@@ -226,6 +299,7 @@ <h1>Camera Interaction App</h1>
226299

227300
instructionText.disabled = true;
228301
intervalSelect.disabled = true;
302+
modelSelect.disabled = true;
229303

230304
responseText.value = "Processing started...";
231305

@@ -250,6 +324,7 @@ <h1>Camera Interaction App</h1>
250324

251325
instructionText.disabled = false;
252326
intervalSelect.disabled = false;
327+
modelSelect.disabled = false;
253328
if (responseText.value.startsWith("Processing started...")) {
254329
responseText.value = "Processing stopped.";
255330
}
@@ -278,4 +353,4 @@ <h1>Camera Interaction App</h1>
278353

279354
</script>
280355
</body>
281-
</html>
356+
</html>

0 commit comments

Comments
 (0)