Skip to content

Commit ccffbbc

Browse files
aleksandr_ershov1aleksandr_ershov1
authored andcommitted
Task khshanovskyi#2: Image To Text
1 parent c97cee4 commit ccffbbc

File tree

1 file changed

+19
-22
lines changed

1 file changed

+19
-22
lines changed

task/image_to_text/task_dial_itt.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,31 +12,28 @@
1212

1313
async def _put_image(file_name: str = 'dialx-banner.png') -> Attachment:
1414
image_path = Path(__file__).parent.parent.parent / file_name
15-
bucket_cli = DialBucketClient(api_key=API_KEY, base_url=DIAL_URL)
16-
# load image bytes
17-
with open(image_path, 'rb') as f:
18-
image_bytes = BytesIO(f.read())
19-
return bucket_cli.put_file(name=file_name, mime_type='image/png', content=image_bytes)
15+
async with DialBucketClient(api_key=API_KEY, base_url=DIAL_URL) as bucket_cli:
16+
with open(image_path, 'rb') as f:
17+
image_bytes = BytesIO(f.read())
18+
json = await bucket_cli.put_file(name=file_name, mime_type='image/png', content=image_bytes)
19+
return Attachment(title=file_name, type=json['contentType'], url=json['url'])
2020

2121
return None
2222

2323

24-
def start() -> None:
24+
async def start() -> None:
2525
# TODO:
2626
# 1. Create DialModelClient
27-
# 2. Upload image (use `_put_image` method )
28-
# 3. Print attachment to see result
29-
# 4. Call chat completion via client with list containing one Message:
30-
# - role: Role.USER
31-
# - content: "What do you see on this picture?"
32-
# - custom_content: CustomContent(attachments=[attachment])
33-
# ---------------------------------------------------------------------------------------------------------------
34-
# Note: This approach uploads the image to DIAL bucket and references it via attachment. The key benefit of this
35-
# approach that we can use Models from different vendors (OpenAI, Google, Anthropic). The DIAL Core
36-
# adapts this attachment to Message content in appropriate format for Model.
37-
# TRY THIS APPROACH WITH DIFFERENT MODELS!
38-
# Optional: Try upload 2+ pictures for analysis
39-
raise NotImplementedError
40-
41-
42-
start()
27+
dial_client = DialModelClient(api_key=API_KEY, endpoint=DIAL_CHAT_COMPLETIONS_ENDPOINT, deployment_name="gpt-4o")
28+
29+
attachemnt_task = [_put_image(img) for img in ['dialx-banner.png', '20260224151722_Image.png']]
30+
attachements = await asyncio.gather(*attachemnt_task)
31+
if not all(attachements):
32+
raise Exception("Failed to upload image")
33+
34+
print(f"Image(s) uploaded to DIAL bucket. URLs: {[att.url for att in attachements]}")
35+
message = Message(role=Role.USER, content="What do you see on this picture(s)?", custom_content=CustomContent(attachments=attachements))
36+
resp_msg = dial_client.get_completion(messages=[message])
37+
print(f"Model response: {resp_msg.content}")
38+
39+
asyncio.run(start())

0 commit comments

Comments
 (0)