-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Expand file tree
/
Copy pathollama_chat_multimodal.py
More file actions
57 lines (38 loc) · 1.53 KB
/
ollama_chat_multimodal.py
File metadata and controls
57 lines (38 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Copyright (c) Microsoft. All rights reserved.
import asyncio
from agent_framework import Content, Message
from agent_framework.ollama import OllamaChatClient
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
"""
Ollama Agent Multimodal Example
This sample demonstrates implementing a Ollama agent with multimodal input capabilities.
Ensure to install Ollama and have a model running locally before running the sample
Not all Models support multimodal input, to test multimodal input try gemma3:4b
Set the model to use via the OLLAMA_MODEL environment variable or modify the code below.
https://ollama.com/
"""
def create_sample_image() -> str:
"""Create a simple 1x1 pixel PNG image for testing."""
# This is a tiny red pixel in PNG format
png_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
return f"data:image/png;base64,{png_data}"
async def test_image() -> None:
"""Test image analysis with Ollama."""
client = OllamaChatClient()
image_uri = create_sample_image()
message = Message(
role="user",
contents=[
Content.from_text(text="What's in this image?"),
Content.from_uri(uri=image_uri, media_type="image/png"),
],
)
response = await client.get_response([message])
print(f"Image Response: {response}")
async def main() -> None:
print("=== Testing Ollama Multimodal ===")
await test_image()
if __name__ == "__main__":
asyncio.run(main())