1
+ import base64
1
2
import logging
2
3
from abc import ABC
3
- import base64
4
4
5
5
import aiohttp
6
6
from azure .core .credentials_async import AsyncTokenCredential
7
7
from azure .identity .aio import get_bearer_token_provider
8
+ from openai import AsyncOpenAI
8
9
from rich .progress import Progress
9
10
from tenacity import retry , retry_if_exception_type , stop_after_attempt , wait_fixed
10
- from openai import AsyncOpenAI
11
11
12
12
logger = logging .getLogger ("scripts" )
13
13
@@ -107,12 +107,13 @@ async def describe_image(self, image_bytes: bytes) -> str:
107
107
fields = results ["result" ]["contents" ][0 ]["fields" ]
108
108
return fields ["Description" ]["valueString" ]
109
109
110
+
110
111
class MultimodalModelDescriber (MediaDescriber ):
111
112
def __init__ (self , openai_client : AsyncOpenAI , model : str , deployment : str ):
112
113
self .openai_client = openai_client
113
114
self .model = model
114
115
self .deployment = deployment
115
-
116
+
116
117
async def describe_image (self , image_bytes : bytes ) -> str :
117
118
image_base64 = base64 .b64encode (image_bytes ).decode ("utf-8" )
118
119
image_datauri = f"data:image/png;base64,{ image_base64 } "
@@ -127,11 +128,15 @@ async def describe_image(self, image_bytes: bytes) -> str:
127
128
},
128
129
{
129
130
"role" : "user" ,
130
- "content" :
131
- [{"text" : "Describe image with no more than 5 sentences. Do not speculate about anything you don't know." , "type" : "text" },
132
- {"image_url" : {"url" : image_datauri }, "type" : "image_url" , "detail" : "auto" }]
133
- }
134
- ])
131
+ "content" : [
132
+ {
133
+ "text" : "Describe image with no more than 5 sentences. Do not speculate about anything you don't know." ,
134
+ "type" : "text" ,
135
+ },
136
+ {"image_url" : {"url" : image_datauri }, "type" : "image_url" , "detail" : "auto" },
137
+ ],
138
+ },
139
+ ],
140
+ )
135
141
description = response .choices [0 ].message .content .strip () if response .choices else ""
136
142
return description
137
-
0 commit comments