Annotation-Garden
diff --git a/‎frontend/index.html‎
Lines changed: 200 additions & 41 deletions b/‎frontend/index.html‎
Lines changed: 200 additions & 41 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
@@ -1130,6 +1130,14 @@ <h3>Image Preview</h3>
                         <option value="8.2.0">8.2.0</option>
                     </select>
                 </div>
+                <div>
+                    <label for="annotationModel-image">Annotation Model</label>
+                    <select id="annotationModel-image">
+                        <option value="anthropic/claude-haiku-4.5" data-provider="anthropic" selected>Claude Haiku 4.5 (best)</option>
+                        <option value="mistralai/mistral-small-3.2-24b-instruct" data-provider="mistral">Mistral Small 3.2 (balanced, cheap)</option>
+                        <option value="openai/gpt-oss-120b" data-provider="Cerebras">GPT-OSS 120B (fast, cheap)</option>
+                    </select>
+                </div>
                 <div>
                     <label for="maxAttempts-image">Max Validation Attempts</label>
                     <input type="number" id="maxAttempts-image" value="3" min="1" max="10">
@@ -1507,6 +1515,11 @@ <h3>Status</h3>
             const maxAttempts = parseInt(document.getElementById('maxAttempts-image').value);
             const runAssessment = document.getElementById('runAssessment-image').checked;
 
+            // Get selected model and provider
+            const modelSelect = document.getElementById('annotationModel-image');
+            const selectedModel = modelSelect.value;
+            const selectedProvider = modelSelect.options[modelSelect.selectedIndex].dataset.provider;
+
             // Store for feedback
             lastInputDescription = null; // No text description for image mode
             lastImageData = uploadedImageBase64;
@@ -1516,58 +1529,199 @@ <h3>Status</h3>
             document.getElementById('generateBtn-image').disabled = true;
             document.getElementById('progressStatus').classList.add('active');
 
-            updateProgress('Analyzing image...', []);
+            updateProgress('Analyzing image...', getProgressSteps('starting'));
 
             try {
-                // Get Turnstile token for bot protection
-                const turnstileToken = await getTurnstileToken();
+                // Try streaming first, fall back to non-streaming
+                await generateFromImageStreaming(visionPrompt, schema, maxAttempts, runAssessment, selectedModel, selectedProvider);
+            } catch (error) {
+                // If streaming fails, try non-streaming fallback
+                console.warn('Streaming failed, trying fallback:', error);
+                try {
+                    await generateFromImageFallback(visionPrompt, schema, maxAttempts, runAssessment, selectedModel, selectedProvider);
+                } catch (fallbackError) {
+                    displayError(fallbackError.message);
+                    document.getElementById('progressStatus').classList.remove('active');
+                    document.getElementById('generateBtn-image').disabled = false;
+                }
+            }
+        }
 
-                const payload = {
-                    image: uploadedImageBase64,
-                    schema_version: schema,
-                    max_validation_attempts: maxAttempts,
-                    run_assessment: runAssessment,
-                    telemetry_enabled: isTelemetryEnabled()
-                };
+        async function generateFromImageStreaming(visionPrompt, schema, maxAttempts, runAssessment, selectedModel, selectedProvider) {
+            // Get Turnstile token for bot protection
+            const turnstileToken = await getTurnstileToken();
+
+            const payload = {
+                image: uploadedImageBase64,
+                schema_version: schema,
+                max_validation_attempts: maxAttempts,
+                run_assessment: runAssessment,
+                telemetry_enabled: isTelemetryEnabled()
+            };
+
+            if (visionPrompt) {
+                payload.prompt = visionPrompt;
+            }
+
+            if (turnstileToken) {
+                payload.cf_turnstile_response = turnstileToken;
+            }
+
+            const response = await fetch(`${API_URL}/annotate-from-image/stream`, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'X-User-Id': `frontend-${FRONTEND_VERSION}`,
+                    'X-OpenRouter-Model': selectedModel,
+                    'X-OpenRouter-Provider': selectedProvider,
+                },
+                body: JSON.stringify(payload)
+            });
+
+            if (!response.ok) {
+                throw new Error(`HTTP error! status: ${response.status}`);
+            }
 
-                if (visionPrompt) {
-                    payload.prompt = visionPrompt;
+            // Read the streaming response
+            const reader = response.body.getReader();
+            const decoder = new TextDecoder();
+            let buffer = '';
+
+            while (true) {
+                const { done, value } = await reader.read();
+                if (done) break;
+
+                buffer += decoder.decode(value, { stream: true });
+
+                // Parse SSE events from buffer
+                const lines = buffer.split('\n');
+                buffer = lines.pop() || ''; // Keep incomplete line in buffer
+
+                let currentEvent = null;
+                for (const line of lines) {
+                    if (line.startsWith('event: ')) {
+                        currentEvent = line.substring(7);
+                    } else if (line.startsWith('data: ') && currentEvent) {
+                        const data = JSON.parse(line.substring(6));
+                        handleImageStreamEvent(currentEvent, data);
+                        currentEvent = null;
+                    }
                 }
+            }
 
-                // Include Turnstile token if available
-                if (turnstileToken) {
-                    payload.cf_turnstile_response = turnstileToken;
+            // Flush decoder and process any remaining data (Safari compatibility)
+            buffer += decoder.decode(); // Flush remaining bytes
+            if (buffer.trim()) {
+                const lines = buffer.split('\n');
+                let currentEvent = null;
+                for (const line of lines) {
+                    if (line.startsWith('event: ')) {
+                        currentEvent = line.substring(7);
+                    } else if (line.startsWith('data: ') && currentEvent) {
+                        try {
+                            const data = JSON.parse(line.substring(6));
+                            handleImageStreamEvent(currentEvent, data);
+                        } catch (e) {
+                            console.warn('Failed to parse SSE data:', line);
+                        }
+                        currentEvent = null;
+                    }
                 }
+            }
 
-                const response = await fetch(`${API_URL}/annotate-from-image`, {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json',
-                        'X-User-Id': `frontend-${FRONTEND_VERSION}`,
-                    },
-                    body: JSON.stringify(payload)
-                });
+            document.getElementById('progressStatus').classList.remove('active');
+            document.getElementById('generateBtn-image').disabled = false;
+        }
 
-                if (!response.ok) {
-                    const errorData = await response.json().catch(() => ({}));
-                    throw new Error(errorData.detail || `Server error: ${response.status}`);
-                }
+        function handleImageStreamEvent(eventType, data) {
+            switch (eventType) {
+                case 'progress':
+                    // Map image-specific stages
+                    let stage = data.stage;
+                    if (stage === 'vision') stage = 'starting'; // Vision is first step
+                    updateProgress(data.message, getProgressSteps(stage));
+                    break;
+                case 'image_description':
+                    // Store the image description for later display
+                    lastInputDescription = data.description;
+                    updateProgress('Image analyzed, generating annotation...', getProgressSteps('annotating'));
+                    break;
+                case 'validation':
+                    if (data.valid) {
+                        updateProgress('Validation passed!', getProgressSteps('evaluating'));
+                    } else {
+                        const attemptMsg = `Attempt ${data.attempt}: ${data.message}`;
+                        updateProgress(attemptMsg, getProgressSteps('validating'));
+                    }
+                    break;
+                case 'result':
+                    // Add image_description to result if we captured it
+                    if (lastInputDescription && !data.image_description) {
+                        data.image_description = lastInputDescription;
+                    }
+                    lastResultData = data;
+                    displayImageAnnotationResults(data);
+                    break;
+                case 'error':
+                    displayError(data.message);
+                    document.getElementById('progressStatus').classList.remove('active');
+                    document.getElementById('generateBtn-image').disabled = false;
+                    break;
+                case 'done':
+                    // Streaming complete
+                    break;
+            }
+        }
+
+        async function generateFromImageFallback(visionPrompt, schema, maxAttempts, runAssessment, selectedModel, selectedProvider) {
+            updateProgress('Analyzing image...', getProgressSteps('starting'));
+
+            // Get Turnstile token for bot protection
+            const turnstileToken = await getTurnstileToken();
 
-                const result = await response.json();
+            const payload = {
+                image: uploadedImageBase64,
+                schema_version: schema,
+                max_validation_attempts: maxAttempts,
+                run_assessment: runAssessment,
+                telemetry_enabled: isTelemetryEnabled()
+            };
 
-                // Store for feedback
-                lastResultData = result;
-                lastInputDescription = result.image_description; // Use generated description
+            if (visionPrompt) {
+                payload.prompt = visionPrompt;
+            }
 
-                // Display results
-                displayImageAnnotationResults(result);
+            if (turnstileToken) {
+                payload.cf_turnstile_response = turnstileToken;
+            }
 
-            } catch (error) {
-                displayError(error.message);
-            } finally {
-                document.getElementById('progressStatus').classList.remove('active');
-                document.getElementById('generateBtn-image').disabled = false;
+            const response = await fetch(`${API_URL}/annotate-from-image`, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'X-User-Id': `frontend-${FRONTEND_VERSION}`,
+                    'X-OpenRouter-Model': selectedModel,
+                    'X-OpenRouter-Provider': selectedProvider,
+                },
+                body: JSON.stringify(payload)
+            });
+
+            if (!response.ok) {
+                const errorData = await response.json().catch(() => ({}));
+                throw new Error(errorData.detail || `Server error: ${response.status}`);
             }
+
+            const result = await response.json();
+
+            // Store for feedback
+            lastResultData = result;
+            lastInputDescription = result.image_description;
+
+            // Display results
+            displayImageAnnotationResults(result);
+
+            document.getElementById('progressStatus').classList.remove('active');
+            document.getElementById('generateBtn-image').disabled = false;
         }
 
         function displayImageAnnotationResults(result) {
@@ -1697,16 +1851,21 @@ <h3>Generated Image Description</h3>
                 }
             }
 
-            // Process any remaining data
+            // Flush decoder and process any remaining data (Safari compatibility)
+            buffer += decoder.decode(); // Flush remaining bytes
             if (buffer.trim()) {
                 const lines = buffer.split('\n');
                 let currentEvent = null;
                 for (const line of lines) {
                     if (line.startsWith('event: ')) {
                         currentEvent = line.substring(7);
                     } else if (line.startsWith('data: ') && currentEvent) {
-                        const data = JSON.parse(line.substring(6));
-                        handleStreamEvent(currentEvent, data);
+                        try {
+                            const data = JSON.parse(line.substring(6));
+                            handleStreamEvent(currentEvent, data);
+                        } catch (e) {
+                            console.warn('Failed to parse SSE data:', line);
+                        }
                         currentEvent = null;
                     }
                 }
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "hedit"
-version = "0.6.7a3"
+version = "0.6.8.dev1"
 description = "Multi-agent system for HED annotation generation and validation"
 readme = "PKG_README.md"
 requires-python = ">=3.12"