@@ -188,6 +188,24 @@ func (a *AzureAIFoundry) generateText(ctx context.Context, modelName string, inp
188188 return a .generateTextSync (ctx , params , input )
189189}
190190
191+ // hasMultimodalContent checks if a message contains multimodal content (text + images)
192+ func (a * AzureAIFoundry ) hasMultimodalContent (msg * ai.Message ) bool {
193+ hasText := false
194+ hasMedia := false
195+
196+ for _ , part := range msg .Content {
197+ if part .IsText () {
198+ hasText = true
199+ }
200+ if part .IsMedia () {
201+ hasMedia = true
202+ }
203+ }
204+
205+ // Return true if it has media, or if it has multiple parts (regardless of media)
206+ return hasMedia || (hasText && len (msg .Content ) > 1 )
207+ }
208+
191209// convertMessagesToOpenAI converts Genkit messages to OpenAI message format
192210func (a * AzureAIFoundry ) convertMessagesToOpenAI (messages []* ai.Message ) []openai.ChatCompletionMessageParamUnion {
193211 var openAIMessages []openai.ChatCompletionMessageParamUnion
@@ -207,13 +225,48 @@ func (a *AzureAIFoundry) convertMessagesToOpenAI(messages []*ai.Message) []opena
207225 },
208226 })
209227 case ai .RoleUser :
210- openAIMessages = append (openAIMessages , openai.ChatCompletionMessageParamUnion {
211- OfUser : & openai.ChatCompletionUserMessageParam {
212- Content : openai.ChatCompletionUserMessageParamContentUnion {
213- OfString : openai .String (msg .Content [0 ].Text ),
228+ // Check if message contains multimodal content (text + images)
229+ if a .hasMultimodalContent (msg ) {
230+ // Handle multimodal content with array of content parts
231+ var contentParts []openai.ChatCompletionContentPartUnionParam
232+
233+ for _ , part := range msg .Content {
234+ if part .IsText () {
235+ contentParts = append (contentParts , openai.ChatCompletionContentPartUnionParam {
236+ OfText : & openai.ChatCompletionContentPartTextParam {
237+ Text : part .Text ,
238+ },
239+ })
240+ } else if part .IsMedia () {
241+ // Handle image/media content
242+ // Media parts store the URL in the Text field
243+ contentParts = append (contentParts , openai.ChatCompletionContentPartUnionParam {
244+ OfImageURL : & openai.ChatCompletionContentPartImageParam {
245+ ImageURL : openai.ChatCompletionContentPartImageImageURLParam {
246+ URL : part .Text ,
247+ },
248+ },
249+ })
250+ }
251+ }
252+
253+ openAIMessages = append (openAIMessages , openai.ChatCompletionMessageParamUnion {
254+ OfUser : & openai.ChatCompletionUserMessageParam {
255+ Content : openai.ChatCompletionUserMessageParamContentUnion {
256+ OfArrayOfContentParts : contentParts ,
257+ },
214258 },
215- },
216- })
259+ })
260+ } else {
261+ // Simple text-only message
262+ openAIMessages = append (openAIMessages , openai.ChatCompletionMessageParamUnion {
263+ OfUser : & openai.ChatCompletionUserMessageParam {
264+ Content : openai.ChatCompletionUserMessageParamContentUnion {
265+ OfString : openai .String (msg .Content [0 ].Text ),
266+ },
267+ },
268+ })
269+ }
217270 case ai .RoleModel :
218271 // Extract all content parts and tool requests
219272 var textContent string
0 commit comments