Skip to content

Commit 180550b

Browse files
Merge pull request #86 from Annotation-Garden/develop
Add streaming support for image annotation
2 parents 50078b9 + 1fe442d commit 180550b

File tree

9 files changed

+772
-75
lines changed

9 files changed

+772
-75
lines changed

frontend/index.html

Lines changed: 200 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,14 @@ <h3>Image Preview</h3>
11301130
<option value="8.2.0">8.2.0</option>
11311131
</select>
11321132
</div>
1133+
<div>
1134+
<label for="annotationModel-image">Annotation Model</label>
1135+
<select id="annotationModel-image">
1136+
<option value="anthropic/claude-haiku-4.5" data-provider="anthropic" selected>Claude Haiku 4.5 (best)</option>
1137+
<option value="mistralai/mistral-small-3.2-24b-instruct" data-provider="mistral">Mistral Small 3.2 (balanced, cheap)</option>
1138+
<option value="openai/gpt-oss-120b" data-provider="Cerebras">GPT-OSS 120B (fast, cheap)</option>
1139+
</select>
1140+
</div>
11331141
<div>
11341142
<label for="maxAttempts-image">Max Validation Attempts</label>
11351143
<input type="number" id="maxAttempts-image" value="3" min="1" max="10">
@@ -1507,6 +1515,11 @@ <h3>Status</h3>
15071515
const maxAttempts = parseInt(document.getElementById('maxAttempts-image').value);
15081516
const runAssessment = document.getElementById('runAssessment-image').checked;
15091517

1518+
// Get selected model and provider
1519+
const modelSelect = document.getElementById('annotationModel-image');
1520+
const selectedModel = modelSelect.value;
1521+
const selectedProvider = modelSelect.options[modelSelect.selectedIndex].dataset.provider;
1522+
15101523
// Store for feedback
15111524
lastInputDescription = null; // No text description for image mode
15121525
lastImageData = uploadedImageBase64;
@@ -1516,58 +1529,199 @@ <h3>Status</h3>
15161529
document.getElementById('generateBtn-image').disabled = true;
15171530
document.getElementById('progressStatus').classList.add('active');
15181531

1519-
updateProgress('Analyzing image...', []);
1532+
updateProgress('Analyzing image...', getProgressSteps('starting'));
15201533

15211534
try {
1522-
// Get Turnstile token for bot protection
1523-
const turnstileToken = await getTurnstileToken();
1535+
// Try streaming first, fall back to non-streaming
1536+
await generateFromImageStreaming(visionPrompt, schema, maxAttempts, runAssessment, selectedModel, selectedProvider);
1537+
} catch (error) {
1538+
// If streaming fails, try non-streaming fallback
1539+
console.warn('Streaming failed, trying fallback:', error);
1540+
try {
1541+
await generateFromImageFallback(visionPrompt, schema, maxAttempts, runAssessment, selectedModel, selectedProvider);
1542+
} catch (fallbackError) {
1543+
displayError(fallbackError.message);
1544+
document.getElementById('progressStatus').classList.remove('active');
1545+
document.getElementById('generateBtn-image').disabled = false;
1546+
}
1547+
}
1548+
}
15241549

1525-
const payload = {
1526-
image: uploadedImageBase64,
1527-
schema_version: schema,
1528-
max_validation_attempts: maxAttempts,
1529-
run_assessment: runAssessment,
1530-
telemetry_enabled: isTelemetryEnabled()
1531-
};
1550+
async function generateFromImageStreaming(visionPrompt, schema, maxAttempts, runAssessment, selectedModel, selectedProvider) {
1551+
// Get Turnstile token for bot protection
1552+
const turnstileToken = await getTurnstileToken();
1553+
1554+
const payload = {
1555+
image: uploadedImageBase64,
1556+
schema_version: schema,
1557+
max_validation_attempts: maxAttempts,
1558+
run_assessment: runAssessment,
1559+
telemetry_enabled: isTelemetryEnabled()
1560+
};
1561+
1562+
if (visionPrompt) {
1563+
payload.prompt = visionPrompt;
1564+
}
1565+
1566+
if (turnstileToken) {
1567+
payload.cf_turnstile_response = turnstileToken;
1568+
}
1569+
1570+
const response = await fetch(`${API_URL}/annotate-from-image/stream`, {
1571+
method: 'POST',
1572+
headers: {
1573+
'Content-Type': 'application/json',
1574+
'X-User-Id': `frontend-${FRONTEND_VERSION}`,
1575+
'X-OpenRouter-Model': selectedModel,
1576+
'X-OpenRouter-Provider': selectedProvider,
1577+
},
1578+
body: JSON.stringify(payload)
1579+
});
1580+
1581+
if (!response.ok) {
1582+
throw new Error(`HTTP error! status: ${response.status}`);
1583+
}
15321584

1533-
if (visionPrompt) {
1534-
payload.prompt = visionPrompt;
1585+
// Read the streaming response
1586+
const reader = response.body.getReader();
1587+
const decoder = new TextDecoder();
1588+
let buffer = '';
1589+
1590+
while (true) {
1591+
const { done, value } = await reader.read();
1592+
if (done) break;
1593+
1594+
buffer += decoder.decode(value, { stream: true });
1595+
1596+
// Parse SSE events from buffer
1597+
const lines = buffer.split('\n');
1598+
buffer = lines.pop() || ''; // Keep incomplete line in buffer
1599+
1600+
let currentEvent = null;
1601+
for (const line of lines) {
1602+
if (line.startsWith('event: ')) {
1603+
currentEvent = line.substring(7);
1604+
} else if (line.startsWith('data: ') && currentEvent) {
1605+
const data = JSON.parse(line.substring(6));
1606+
handleImageStreamEvent(currentEvent, data);
1607+
currentEvent = null;
1608+
}
15351609
}
1610+
}
15361611

1537-
// Include Turnstile token if available
1538-
if (turnstileToken) {
1539-
payload.cf_turnstile_response = turnstileToken;
1612+
// Flush decoder and process any remaining data (Safari compatibility)
1613+
buffer += decoder.decode(); // Flush remaining bytes
1614+
if (buffer.trim()) {
1615+
const lines = buffer.split('\n');
1616+
let currentEvent = null;
1617+
for (const line of lines) {
1618+
if (line.startsWith('event: ')) {
1619+
currentEvent = line.substring(7);
1620+
} else if (line.startsWith('data: ') && currentEvent) {
1621+
try {
1622+
const data = JSON.parse(line.substring(6));
1623+
handleImageStreamEvent(currentEvent, data);
1624+
} catch (e) {
1625+
console.warn('Failed to parse SSE data:', line);
1626+
}
1627+
currentEvent = null;
1628+
}
15401629
}
1630+
}
15411631

1542-
const response = await fetch(`${API_URL}/annotate-from-image`, {
1543-
method: 'POST',
1544-
headers: {
1545-
'Content-Type': 'application/json',
1546-
'X-User-Id': `frontend-${FRONTEND_VERSION}`,
1547-
},
1548-
body: JSON.stringify(payload)
1549-
});
1632+
document.getElementById('progressStatus').classList.remove('active');
1633+
document.getElementById('generateBtn-image').disabled = false;
1634+
}
15501635

1551-
if (!response.ok) {
1552-
const errorData = await response.json().catch(() => ({}));
1553-
throw new Error(errorData.detail || `Server error: ${response.status}`);
1554-
}
1636+
function handleImageStreamEvent(eventType, data) {
1637+
switch (eventType) {
1638+
case 'progress':
1639+
// Map image-specific stages
1640+
let stage = data.stage;
1641+
if (stage === 'vision') stage = 'starting'; // Vision is first step
1642+
updateProgress(data.message, getProgressSteps(stage));
1643+
break;
1644+
case 'image_description':
1645+
// Store the image description for later display
1646+
lastInputDescription = data.description;
1647+
updateProgress('Image analyzed, generating annotation...', getProgressSteps('annotating'));
1648+
break;
1649+
case 'validation':
1650+
if (data.valid) {
1651+
updateProgress('Validation passed!', getProgressSteps('evaluating'));
1652+
} else {
1653+
const attemptMsg = `Attempt ${data.attempt}: ${data.message}`;
1654+
updateProgress(attemptMsg, getProgressSteps('validating'));
1655+
}
1656+
break;
1657+
case 'result':
1658+
// Add image_description to result if we captured it
1659+
if (lastInputDescription && !data.image_description) {
1660+
data.image_description = lastInputDescription;
1661+
}
1662+
lastResultData = data;
1663+
displayImageAnnotationResults(data);
1664+
break;
1665+
case 'error':
1666+
displayError(data.message);
1667+
document.getElementById('progressStatus').classList.remove('active');
1668+
document.getElementById('generateBtn-image').disabled = false;
1669+
break;
1670+
case 'done':
1671+
// Streaming complete
1672+
break;
1673+
}
1674+
}
1675+
1676+
async function generateFromImageFallback(visionPrompt, schema, maxAttempts, runAssessment, selectedModel, selectedProvider) {
1677+
updateProgress('Analyzing image...', getProgressSteps('starting'));
1678+
1679+
// Get Turnstile token for bot protection
1680+
const turnstileToken = await getTurnstileToken();
15551681

1556-
const result = await response.json();
1682+
const payload = {
1683+
image: uploadedImageBase64,
1684+
schema_version: schema,
1685+
max_validation_attempts: maxAttempts,
1686+
run_assessment: runAssessment,
1687+
telemetry_enabled: isTelemetryEnabled()
1688+
};
15571689

1558-
// Store for feedback
1559-
lastResultData = result;
1560-
lastInputDescription = result.image_description; // Use generated description
1690+
if (visionPrompt) {
1691+
payload.prompt = visionPrompt;
1692+
}
15611693

1562-
// Display results
1563-
displayImageAnnotationResults(result);
1694+
if (turnstileToken) {
1695+
payload.cf_turnstile_response = turnstileToken;
1696+
}
15641697

1565-
} catch (error) {
1566-
displayError(error.message);
1567-
} finally {
1568-
document.getElementById('progressStatus').classList.remove('active');
1569-
document.getElementById('generateBtn-image').disabled = false;
1698+
const response = await fetch(`${API_URL}/annotate-from-image`, {
1699+
method: 'POST',
1700+
headers: {
1701+
'Content-Type': 'application/json',
1702+
'X-User-Id': `frontend-${FRONTEND_VERSION}`,
1703+
'X-OpenRouter-Model': selectedModel,
1704+
'X-OpenRouter-Provider': selectedProvider,
1705+
},
1706+
body: JSON.stringify(payload)
1707+
});
1708+
1709+
if (!response.ok) {
1710+
const errorData = await response.json().catch(() => ({}));
1711+
throw new Error(errorData.detail || `Server error: ${response.status}`);
15701712
}
1713+
1714+
const result = await response.json();
1715+
1716+
// Store for feedback
1717+
lastResultData = result;
1718+
lastInputDescription = result.image_description;
1719+
1720+
// Display results
1721+
displayImageAnnotationResults(result);
1722+
1723+
document.getElementById('progressStatus').classList.remove('active');
1724+
document.getElementById('generateBtn-image').disabled = false;
15711725
}
15721726

15731727
function displayImageAnnotationResults(result) {
@@ -1697,16 +1851,21 @@ <h3>Generated Image Description</h3>
16971851
}
16981852
}
16991853

1700-
// Process any remaining data
1854+
// Flush decoder and process any remaining data (Safari compatibility)
1855+
buffer += decoder.decode(); // Flush remaining bytes
17011856
if (buffer.trim()) {
17021857
const lines = buffer.split('\n');
17031858
let currentEvent = null;
17041859
for (const line of lines) {
17051860
if (line.startsWith('event: ')) {
17061861
currentEvent = line.substring(7);
17071862
} else if (line.startsWith('data: ') && currentEvent) {
1708-
const data = JSON.parse(line.substring(6));
1709-
handleStreamEvent(currentEvent, data);
1863+
try {
1864+
const data = JSON.parse(line.substring(6));
1865+
handleStreamEvent(currentEvent, data);
1866+
} catch (e) {
1867+
console.warn('Failed to parse SSE data:', line);
1868+
}
17101869
currentEvent = null;
17111870
}
17121871
}

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "hedit"
7-
version = "0.6.7a3"
7+
version = "0.6.8.dev1"
88
description = "Multi-agent system for HED annotation generation and validation"
99
readme = "PKG_README.md"
1010
requires-python = ">=3.12"

0 commit comments

Comments
 (0)