@@ -55,9 +55,9 @@ def auto_complete_config(auto_complete_model_config):
5555 inputs = [
5656 {"name" : "text_input" , "data_type" : "TYPE_STRING" , "dims" : [1 ]},
5757 {
58- "name" : "multi_modal_data " ,
58+ "name" : "image " ,
5959 "data_type" : "TYPE_STRING" ,
60- "dims" : [1 ],
60+ "dims" : [- 1 ], # can be multiple images as separate elements
6161 "optional" : True ,
6262 },
6363 {
@@ -394,20 +394,16 @@ async def generate(self, request):
394394 if isinstance (prompt , bytes ):
395395 prompt = prompt .decode ("utf-8" )
396396
397- multi_modal_data_input_tensor = pb_utils .get_input_tensor_by_name (
398- request , "multi_modal_data "
397+ image_input_tensor = pb_utils .get_input_tensor_by_name (
398+ request , "image "
399399 )
400- if multi_modal_data_input_tensor :
401- multi_modal_data = multi_modal_data_input_tensor .as_numpy ()[0 ].decode ("utf-8" )
402- multi_modal_data = json .loads (multi_modal_data )
403- if "image" in multi_modal_data :
404- image_list = []
405- for image_base64_string in multi_modal_data ["image" ]:
406- if "base64," in image_base64_string :
407- image_base64_string = image_base64_string .split ("base64," )[- 1 ]
408- image_data = base64 .b64decode (image_base64_string )
409- image = Image .open (BytesIO (image_data )).convert ("RGB" )
410- image_list .append (image )
400+ if image_input_tensor :
401+ image_list = []
402+ for image_raw in image_input_tensor .as_numpy ():
403+ image_data = base64 .b64decode (image_raw .decode ("utf-8" ))
404+ image = Image .open (BytesIO (image_data )).convert ("RGB" )
405+ image_list .append (image )
406+ if len (image_list ) > 0 :
411407 prompt = {
412408 "prompt" : prompt ,
413409 "multi_modal_data" : {
0 commit comments