Skip to content

Commit 4d17031

Browse files
committed
fix: vision working
1 parent 907ef8a commit 4d17031

File tree

4 files changed

+118
-6
lines changed

4 files changed

+118
-6
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,6 @@ examples/chat/chat
5353
examples/tool_calling/tool_calling
5454
examples/embeddings/embeddings
5555
examples/streaming/streaming
56+
examples/vision/vision
5657
.genkit/
5758
node_modules/

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ The repository includes comprehensive examples:
392392
- **`examples/chat/`** - Multi-turn conversation with context
393393
- **`examples/embeddings/`** - Text embeddings generation
394394
- **`examples/tool_calling/`** - Function calling with multiple tools
395+
- **`examples/vision/`** - Multimodal image analysis
395396

396397
### Running Examples
397398

@@ -415,6 +416,10 @@ go run main.go
415416
# Run tool calling example
416417
cd ../tool_calling
417418
go run main.go
419+
420+
# Run vision example
421+
cd ../vision
422+
go run main.go
418423
```
419424

420425
## Features in Detail

azureaifoundry_plugin.go

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,24 @@ func (a *AzureAIFoundry) generateText(ctx context.Context, modelName string, inp
188188
return a.generateTextSync(ctx, params, input)
189189
}
190190

191+
// hasMultimodalContent checks if a message contains multimodal content (text + images)
192+
func (a *AzureAIFoundry) hasMultimodalContent(msg *ai.Message) bool {
193+
hasText := false
194+
hasMedia := false
195+
196+
for _, part := range msg.Content {
197+
if part.IsText() {
198+
hasText = true
199+
}
200+
if part.IsMedia() {
201+
hasMedia = true
202+
}
203+
}
204+
205+
// Return true if it has media, or if it has multiple parts (regardless of media)
206+
return hasMedia || (hasText && len(msg.Content) > 1)
207+
}
208+
191209
// convertMessagesToOpenAI converts Genkit messages to OpenAI message format
192210
func (a *AzureAIFoundry) convertMessagesToOpenAI(messages []*ai.Message) []openai.ChatCompletionMessageParamUnion {
193211
var openAIMessages []openai.ChatCompletionMessageParamUnion
@@ -207,13 +225,48 @@ func (a *AzureAIFoundry) convertMessagesToOpenAI(messages []*ai.Message) []opena
207225
},
208226
})
209227
case ai.RoleUser:
210-
openAIMessages = append(openAIMessages, openai.ChatCompletionMessageParamUnion{
211-
OfUser: &openai.ChatCompletionUserMessageParam{
212-
Content: openai.ChatCompletionUserMessageParamContentUnion{
213-
OfString: openai.String(msg.Content[0].Text),
228+
// Check if message contains multimodal content (text + images)
229+
if a.hasMultimodalContent(msg) {
230+
// Handle multimodal content with array of content parts
231+
var contentParts []openai.ChatCompletionContentPartUnionParam
232+
233+
for _, part := range msg.Content {
234+
if part.IsText() {
235+
contentParts = append(contentParts, openai.ChatCompletionContentPartUnionParam{
236+
OfText: &openai.ChatCompletionContentPartTextParam{
237+
Text: part.Text,
238+
},
239+
})
240+
} else if part.IsMedia() {
241+
// Handle image/media content
242+
// Media parts store the URL in the Text field
243+
contentParts = append(contentParts, openai.ChatCompletionContentPartUnionParam{
244+
OfImageURL: &openai.ChatCompletionContentPartImageParam{
245+
ImageURL: openai.ChatCompletionContentPartImageImageURLParam{
246+
URL: part.Text,
247+
},
248+
},
249+
})
250+
}
251+
}
252+
253+
openAIMessages = append(openAIMessages, openai.ChatCompletionMessageParamUnion{
254+
OfUser: &openai.ChatCompletionUserMessageParam{
255+
Content: openai.ChatCompletionUserMessageParamContentUnion{
256+
OfArrayOfContentParts: contentParts,
257+
},
214258
},
215-
},
216-
})
259+
})
260+
} else {
261+
// Simple text-only message
262+
openAIMessages = append(openAIMessages, openai.ChatCompletionMessageParamUnion{
263+
OfUser: &openai.ChatCompletionUserMessageParam{
264+
Content: openai.ChatCompletionUserMessageParamContentUnion{
265+
OfString: openai.String(msg.Content[0].Text),
266+
},
267+
},
268+
})
269+
}
217270
case ai.RoleModel:
218271
// Extract all content parts and tool requests
219272
var textContent string

examples/vision/main.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"log"
6+
7+
"github.com/firebase/genkit/go/ai"
8+
"github.com/firebase/genkit/go/genkit"
9+
azureaifoundry "github.com/xavidop/genkit-azure-foundry-go"
10+
"github.com/xavidop/genkit-azure-foundry-go/examples/common"
11+
)
12+
13+
func main() {
14+
ctx := context.Background()
15+
16+
// Setup Genkit with Azure AI Foundry
17+
g, azurePlugin, err := common.SetupGenkit(ctx, nil)
18+
if err != nil {
19+
log.Fatalf("Failed to setup Genkit: %v", err)
20+
}
21+
22+
// Define a GPT-5 model with vision support
23+
gpt5Model := azurePlugin.DefineModel(g, azureaifoundry.ModelDefinition{
24+
Name: "gpt-5", // Your deployment name in Azure
25+
Type: "chat",
26+
SupportsVision: true,
27+
}, nil)
28+
29+
log.Println("Starting Vision (Multimodal) example...")
30+
log.Println("This example demonstrates how to analyze images")
31+
log.Println()
32+
33+
// Analyze an image from a URL
34+
log.Println("===Analyzing an image from URL ===")
35+
imageURL := "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
36+
37+
response1, err := genkit.Generate(ctx, g,
38+
ai.WithModel(gpt5Model),
39+
ai.WithMessages(ai.NewUserMessage(
40+
ai.NewTextPart("What's in this image? Describe it in detail."),
41+
ai.NewMediaPart("image/jpeg", imageURL),
42+
)),
43+
)
44+
45+
if err != nil {
46+
log.Printf("Error analyzing image from URL: %v", err)
47+
} else {
48+
log.Printf("Response: %s\n", response1.Text())
49+
}
50+
log.Println()
51+
52+
log.Println("Vision example completed!")
53+
}

0 commit comments

Comments
 (0)