1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15+ import math
1516import time
1617from typing import Any , List
1718
19+ from .....utils import logging
1820from .....utils .deps import function_requires_deps , is_dep_available
1921from ...infra import utils as serving_utils
2022from ...infra .config import AppConfig
3537 from openai .types .chat import ChatCompletion
3638 from openai .types .chat .chat_completion import Choice as ChatCompletionChoice
3739 from openai .types .chat .chat_completion_message import ChatCompletionMessage
40+ if is_dep_available ("pillow" ):
41+ from PIL import Image
3842
3943
40- @function_requires_deps ("fastapi" , "openai" )
44+ @function_requires_deps ("fastapi" , "openai" , "pillow" )
4145def create_pipeline_app (pipeline : Any , app_config : AppConfig ) -> "FastAPI" :
4246 app , ctx = create_app (
4347 pipeline = pipeline , app_config = app_config , app_aiohttp_session = True
@@ -55,6 +59,30 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
5559 )
5660 async def _infer (request : InferRequest ) -> "ChatCompletion" :
5761 pipeline = ctx .pipeline
62+ aiohttp_session = ctx .aiohttp_session
63+
64+ def _resize_image_with_token_limit (image , max_token_num = 2200 , tile_size = 28 ):
65+ image = Image .fromarray (image )
66+ w0 , h0 = image .width , image .height
67+ tokens = math .ceil (w0 / tile_size ) * math .ceil (h0 / tile_size )
68+ if tokens <= max_token_num :
69+ return image
70+
71+ k = math .sqrt (
72+ max_token_num / (math .ceil (w0 / tile_size ) * math .ceil (h0 / tile_size ))
73+ )
74+ k = min (1.0 , k )
75+ w_new = max (int (w0 * k ), tile_size )
76+ h_new = max (int (h0 * k ), tile_size )
77+ new_size = (w_new , h_new )
78+ resized_image = image .resize (new_size )
79+ tokens_new = math .ceil (w_new / tile_size ) * math .ceil (h_new / tile_size )
80+ logging .info (
81+ f"Resizing image from { w0 } x{ h0 } to { w_new } x{ h_new } , "
82+ f"which will reduce the image tokens from { tokens } to { tokens_new } ."
83+ )
84+
85+ return resized_image
5886
5987 def _process_messages (messages : List [Message ]):
6088 system_message = ""
@@ -88,9 +116,20 @@ def _process_messages(messages: List[Message]):
88116 return system_message , user_message , image_url
89117
90118 system_message , user_message , image_url = _process_messages (request .messages )
119+ if request .max_image_tokens is not None :
120+ if image_url .startswith ("data:image" ):
121+ _ , image_url = image_url .split ("," , 1 )
122+ img_bytes = await serving_utils .get_raw_bytes_async (
123+ image_url , aiohttp_session
124+ )
125+ image = serving_utils .image_bytes_to_array (img_bytes )
126+ image = _resize_image_with_token_limit (image , request .max_image_tokens )
127+ else :
128+ image = image_url
129+
91130 result = (
92131 await pipeline .infer (
93- {"image" : image_url , "query" : user_message },
132+ {"image" : image , "query" : user_message },
94133 )
95134 )[0 ]
96135
0 commit comments