1919import asyncio
2020import collections
2121import time
22+ import json
2223import uvloop
24+ import requests
25+ import base64
2326import os
27+ from io import BytesIO
2428import pickle
2529from .build_prompt import build_prompt , init_tokenizer
2630
2731asyncio .set_event_loop_policy (uvloop .EventLoopPolicy ())
2832import ujson as json
2933from http import HTTPStatus
3034import uuid
35+ from PIL import Image
3136import multiprocessing as mp
3237from typing import AsyncGenerator , Union
3338from typing import Callable
4045from .httpserver_for_pd_master .manager import HttpServerManagerForPDMaster
4146from .api_lightllm import lightllm_get_score , lightllm_pd_generate_stream
4247from lightllm .utils .envs_utils import get_env_start_args
48+ from lightllm .server .embed_cache .utils import image2base64
4349
4450from .api_models import (
4551 ChatCompletionRequest ,
@@ -230,6 +236,38 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
230236 return create_error_response (HTTPStatus .BAD_REQUEST , "The function call feature is not supported" )
231237
232238 created_time = int (time .time ())
239+
240+ multimodal_params_dict = {"images" : []}
241+ for message in request .messages :
242+ if isinstance (message .content , list ):
243+ texts = []
244+ for content in message .content :
245+ if content .type == 'text' and content .text :
246+ texts .append (content .text )
247+ elif content .type == 'image_url' and content .image_url is not None :
248+ img = content .image_url .url
249+ if img .startswith ("http://" ) or img .startswith ("https://" ):
250+ response = requests .get (img , stream = True , timeout = 2 )
251+ data = image2base64 (response .raw )
252+ elif img .startswith ("file://" ):
253+ data = image2base64 (img [7 :])
254+ elif img .startswith ("data:image" ):
255+ # "data:image/jpeg;base64,{base64_image}"
256+ data_str = img .split (";" , 1 )[1 ]
257+ if data_str .startswith ("base64," ):
258+ data = data_str [7 :]
259+ else :
260+ raise ValueError ("Unrecognized image input." )
261+ else :
262+ raise ValueError ("Unrecognized image input. Supports local path, http url, base64, and PIL.Image." )
263+
264+ multimodal_params_dict ["images" ].append ({
265+ "type" : "base64" ,
266+ "data" : data
267+ })
268+
269+ message .content = "\n " .join (texts )
270+
233271 prompt = await build_prompt (request )
234272 sampling_params_dict = {
235273 "do_sample" : request .do_sample ,
@@ -249,7 +287,7 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
249287 sampling_params .init (tokenizer = g_objs .httpserver_manager .tokenizer , ** sampling_params_dict )
250288
251289 sampling_params .verify ()
252- multimodal_params = MultimodalParams (images = [] )
290+ multimodal_params = MultimodalParams (** multimodal_params_dict )
253291
254292 results_generator = g_objs .httpserver_manager .generate (
255293 prompt , sampling_params , multimodal_params , request = raw_request
0 commit comments