@@ -1130,6 +1130,14 @@ <h3>Image Preview</h3>
11301130 < option value ="8.2.0 "> 8.2.0</ option >
11311131 </ select >
11321132 </ div >
1133+ < div >
1134+ < label for ="annotationModel-image "> Annotation Model</ label >
1135+ < select id ="annotationModel-image ">
1136+ < option value ="anthropic/claude-haiku-4.5 " data-provider ="anthropic " selected > Claude Haiku 4.5 (best)</ option >
1137+ < option value ="mistralai/mistral-small-3.2-24b-instruct " data-provider ="mistral "> Mistral Small 3.2 (balanced, cheap)</ option >
1138+ < option value ="openai/gpt-oss-120b " data-provider ="Cerebras "> GPT-OSS 120B (fast, cheap)</ option >
1139+ </ select >
1140+ </ div >
11331141 < div >
11341142 < label for ="maxAttempts-image "> Max Validation Attempts</ label >
11351143 < input type ="number " id ="maxAttempts-image " value ="3 " min ="1 " max ="10 ">
@@ -1507,6 +1515,11 @@ <h3>Status</h3>
15071515 const maxAttempts = parseInt ( document . getElementById ( 'maxAttempts-image' ) . value ) ;
15081516 const runAssessment = document . getElementById ( 'runAssessment-image' ) . checked ;
15091517
1518+ // Get selected model and provider
1519+ const modelSelect = document . getElementById ( 'annotationModel-image' ) ;
1520+ const selectedModel = modelSelect . value ;
1521+ const selectedProvider = modelSelect . options [ modelSelect . selectedIndex ] . dataset . provider ;
1522+
15101523 // Store for feedback
15111524 lastInputDescription = null ; // No text description for image mode
15121525 lastImageData = uploadedImageBase64 ;
@@ -1516,58 +1529,199 @@ <h3>Status</h3>
15161529 document . getElementById ( 'generateBtn-image' ) . disabled = true ;
15171530 document . getElementById ( 'progressStatus' ) . classList . add ( 'active' ) ;
15181531
1519- updateProgress ( 'Analyzing image...' , [ ] ) ;
1532+ updateProgress ( 'Analyzing image...' , getProgressSteps ( 'starting' ) ) ;
15201533
15211534 try {
1522- // Get Turnstile token for bot protection
1523- const turnstileToken = await getTurnstileToken ( ) ;
1535+ // Try streaming first, fall back to non-streaming
1536+ await generateFromImageStreaming ( visionPrompt , schema , maxAttempts , runAssessment , selectedModel , selectedProvider ) ;
1537+ } catch ( error ) {
1538+ // If streaming fails, try non-streaming fallback
1539+ console . warn ( 'Streaming failed, trying fallback:' , error ) ;
1540+ try {
1541+ await generateFromImageFallback ( visionPrompt , schema , maxAttempts , runAssessment , selectedModel , selectedProvider ) ;
1542+ } catch ( fallbackError ) {
1543+ displayError ( fallbackError . message ) ;
1544+ document . getElementById ( 'progressStatus' ) . classList . remove ( 'active' ) ;
1545+ document . getElementById ( 'generateBtn-image' ) . disabled = false ;
1546+ }
1547+ }
1548+ }
15241549
1525- const payload = {
1526- image : uploadedImageBase64 ,
1527- schema_version : schema ,
1528- max_validation_attempts : maxAttempts ,
1529- run_assessment : runAssessment ,
1530- telemetry_enabled : isTelemetryEnabled ( )
1531- } ;
1550+ async function generateFromImageStreaming ( visionPrompt , schema , maxAttempts , runAssessment , selectedModel , selectedProvider ) {
1551+ // Get Turnstile token for bot protection
1552+ const turnstileToken = await getTurnstileToken ( ) ;
1553+
1554+ const payload = {
1555+ image : uploadedImageBase64 ,
1556+ schema_version : schema ,
1557+ max_validation_attempts : maxAttempts ,
1558+ run_assessment : runAssessment ,
1559+ telemetry_enabled : isTelemetryEnabled ( )
1560+ } ;
1561+
1562+ if ( visionPrompt ) {
1563+ payload . prompt = visionPrompt ;
1564+ }
1565+
1566+ if ( turnstileToken ) {
1567+ payload . cf_turnstile_response = turnstileToken ;
1568+ }
1569+
1570+ const response = await fetch ( `${ API_URL } /annotate-from-image/stream` , {
1571+ method : 'POST' ,
1572+ headers : {
1573+ 'Content-Type' : 'application/json' ,
1574+ 'X-User-Id' : `frontend-${ FRONTEND_VERSION } ` ,
1575+ 'X-OpenRouter-Model' : selectedModel ,
1576+ 'X-OpenRouter-Provider' : selectedProvider ,
1577+ } ,
1578+ body : JSON . stringify ( payload )
1579+ } ) ;
1580+
1581+ if ( ! response . ok ) {
1582+ throw new Error ( `HTTP error! status: ${ response . status } ` ) ;
1583+ }
15321584
1533- if ( visionPrompt ) {
1534- payload . prompt = visionPrompt ;
1585+ // Read the streaming response
1586+ const reader = response . body . getReader ( ) ;
1587+ const decoder = new TextDecoder ( ) ;
1588+ let buffer = '' ;
1589+
1590+ while ( true ) {
1591+ const { done, value } = await reader . read ( ) ;
1592+ if ( done ) break ;
1593+
1594+ buffer += decoder . decode ( value , { stream : true } ) ;
1595+
1596+ // Parse SSE events from buffer
1597+ const lines = buffer . split ( '\n' ) ;
1598+ buffer = lines . pop ( ) || '' ; // Keep incomplete line in buffer
1599+
1600+ let currentEvent = null ;
1601+ for ( const line of lines ) {
1602+ if ( line . startsWith ( 'event: ' ) ) {
1603+ currentEvent = line . substring ( 7 ) ;
1604+ } else if ( line . startsWith ( 'data: ' ) && currentEvent ) {
1605+ const data = JSON . parse ( line . substring ( 6 ) ) ;
1606+ handleImageStreamEvent ( currentEvent , data ) ;
1607+ currentEvent = null ;
1608+ }
15351609 }
1610+ }
15361611
1537- // Include Turnstile token if available
1538- if ( turnstileToken ) {
1539- payload . cf_turnstile_response = turnstileToken ;
1612+ // Flush decoder and process any remaining data (Safari compatibility)
1613+ buffer += decoder . decode ( ) ; // Flush remaining bytes
1614+ if ( buffer . trim ( ) ) {
1615+ const lines = buffer . split ( '\n' ) ;
1616+ let currentEvent = null ;
1617+ for ( const line of lines ) {
1618+ if ( line . startsWith ( 'event: ' ) ) {
1619+ currentEvent = line . substring ( 7 ) ;
1620+ } else if ( line . startsWith ( 'data: ' ) && currentEvent ) {
1621+ try {
1622+ const data = JSON . parse ( line . substring ( 6 ) ) ;
1623+ handleImageStreamEvent ( currentEvent , data ) ;
1624+ } catch ( e ) {
1625+ console . warn ( 'Failed to parse SSE data:' , line ) ;
1626+ }
1627+ currentEvent = null ;
1628+ }
15401629 }
1630+ }
15411631
1542- const response = await fetch ( `${ API_URL } /annotate-from-image` , {
1543- method : 'POST' ,
1544- headers : {
1545- 'Content-Type' : 'application/json' ,
1546- 'X-User-Id' : `frontend-${ FRONTEND_VERSION } ` ,
1547- } ,
1548- body : JSON . stringify ( payload )
1549- } ) ;
1632+ document . getElementById ( 'progressStatus' ) . classList . remove ( 'active' ) ;
1633+ document . getElementById ( 'generateBtn-image' ) . disabled = false ;
1634+ }
15501635
1551- if ( ! response . ok ) {
1552- const errorData = await response . json ( ) . catch ( ( ) => ( { } ) ) ;
1553- throw new Error ( errorData . detail || `Server error: ${ response . status } ` ) ;
1554- }
1636+ function handleImageStreamEvent ( eventType , data ) {
1637+ switch ( eventType ) {
1638+ case 'progress' :
1639+ // Map image-specific stages
1640+ let stage = data . stage ;
1641+ if ( stage === 'vision' ) stage = 'starting' ; // Vision is first step
1642+ updateProgress ( data . message , getProgressSteps ( stage ) ) ;
1643+ break ;
1644+ case 'image_description' :
1645+ // Store the image description for later display
1646+ lastInputDescription = data . description ;
1647+ updateProgress ( 'Image analyzed, generating annotation...' , getProgressSteps ( 'annotating' ) ) ;
1648+ break ;
1649+ case 'validation' :
1650+ if ( data . valid ) {
1651+ updateProgress ( 'Validation passed!' , getProgressSteps ( 'evaluating' ) ) ;
1652+ } else {
1653+ const attemptMsg = `Attempt ${ data . attempt } : ${ data . message } ` ;
1654+ updateProgress ( attemptMsg , getProgressSteps ( 'validating' ) ) ;
1655+ }
1656+ break ;
1657+ case 'result' :
1658+ // Add image_description to result if we captured it
1659+ if ( lastInputDescription && ! data . image_description ) {
1660+ data . image_description = lastInputDescription ;
1661+ }
1662+ lastResultData = data ;
1663+ displayImageAnnotationResults ( data ) ;
1664+ break ;
1665+ case 'error' :
1666+ displayError ( data . message ) ;
1667+ document . getElementById ( 'progressStatus' ) . classList . remove ( 'active' ) ;
1668+ document . getElementById ( 'generateBtn-image' ) . disabled = false ;
1669+ break ;
1670+ case 'done' :
1671+ // Streaming complete
1672+ break ;
1673+ }
1674+ }
1675+
1676+ async function generateFromImageFallback ( visionPrompt , schema , maxAttempts , runAssessment , selectedModel , selectedProvider ) {
1677+ updateProgress ( 'Analyzing image...' , getProgressSteps ( 'starting' ) ) ;
1678+
1679+ // Get Turnstile token for bot protection
1680+ const turnstileToken = await getTurnstileToken ( ) ;
15551681
1556- const result = await response . json ( ) ;
1682+ const payload = {
1683+ image : uploadedImageBase64 ,
1684+ schema_version : schema ,
1685+ max_validation_attempts : maxAttempts ,
1686+ run_assessment : runAssessment ,
1687+ telemetry_enabled : isTelemetryEnabled ( )
1688+ } ;
15571689
1558- // Store for feedback
1559- lastResultData = result ;
1560- lastInputDescription = result . image_description ; // Use generated description
1690+ if ( visionPrompt ) {
1691+ payload . prompt = visionPrompt ;
1692+ }
15611693
1562- // Display results
1563- displayImageAnnotationResults ( result ) ;
1694+ if ( turnstileToken ) {
1695+ payload . cf_turnstile_response = turnstileToken ;
1696+ }
15641697
1565- } catch ( error ) {
1566- displayError ( error . message ) ;
1567- } finally {
1568- document . getElementById ( 'progressStatus' ) . classList . remove ( 'active' ) ;
1569- document . getElementById ( 'generateBtn-image' ) . disabled = false ;
1698+ const response = await fetch ( `${ API_URL } /annotate-from-image` , {
1699+ method : 'POST' ,
1700+ headers : {
1701+ 'Content-Type' : 'application/json' ,
1702+ 'X-User-Id' : `frontend-${ FRONTEND_VERSION } ` ,
1703+ 'X-OpenRouter-Model' : selectedModel ,
1704+ 'X-OpenRouter-Provider' : selectedProvider ,
1705+ } ,
1706+ body : JSON . stringify ( payload )
1707+ } ) ;
1708+
1709+ if ( ! response . ok ) {
1710+ const errorData = await response . json ( ) . catch ( ( ) => ( { } ) ) ;
1711+ throw new Error ( errorData . detail || `Server error: ${ response . status } ` ) ;
15701712 }
1713+
1714+ const result = await response . json ( ) ;
1715+
1716+ // Store for feedback
1717+ lastResultData = result ;
1718+ lastInputDescription = result . image_description ;
1719+
1720+ // Display results
1721+ displayImageAnnotationResults ( result ) ;
1722+
1723+ document . getElementById ( 'progressStatus' ) . classList . remove ( 'active' ) ;
1724+ document . getElementById ( 'generateBtn-image' ) . disabled = false ;
15711725 }
15721726
15731727 function displayImageAnnotationResults ( result ) {
@@ -1697,16 +1851,21 @@ <h3>Generated Image Description</h3>
16971851 }
16981852 }
16991853
1700- // Process any remaining data
1854+ // Flush decoder and process any remaining data (Safari compatibility)
1855+ buffer += decoder . decode ( ) ; // Flush remaining bytes
17011856 if ( buffer . trim ( ) ) {
17021857 const lines = buffer . split ( '\n' ) ;
17031858 let currentEvent = null ;
17041859 for ( const line of lines ) {
17051860 if ( line . startsWith ( 'event: ' ) ) {
17061861 currentEvent = line . substring ( 7 ) ;
17071862 } else if ( line . startsWith ( 'data: ' ) && currentEvent ) {
1708- const data = JSON . parse ( line . substring ( 6 ) ) ;
1709- handleStreamEvent ( currentEvent , data ) ;
1863+ try {
1864+ const data = JSON . parse ( line . substring ( 6 ) ) ;
1865+ handleStreamEvent ( currentEvent , data ) ;
1866+ } catch ( e ) {
1867+ console . warn ( 'Failed to parse SSE data:' , line ) ;
1868+ }
17101869 currentEvent = null ;
17111870 }
17121871 }
0 commit comments