3434 ApiEndpoint ,
3535 audio_to_base64_string ,
3636 bytesio_to_image_tensor ,
37+ download_url_to_image_tensor ,
3738 get_number_of_images ,
3839 sync_op ,
3940 tensor_to_base64_string ,
@@ -141,9 +142,11 @@ def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Litera
141142 )
142143 parts = []
143144 for part in response .candidates [0 ].content .parts :
144- if part_type == "text" and hasattr ( part , "text" ) and part .text :
145+ if part_type == "text" and part .text :
145146 parts .append (part )
146- elif hasattr (part , "inlineData" ) and part .inlineData and part .inlineData .mimeType == part_type :
147+ elif part .inlineData and part .inlineData .mimeType == part_type :
148+ parts .append (part )
149+ elif part .fileData and part .fileData .mimeType == part_type :
147150 parts .append (part )
148151 # Skip parts that don't match the requested type
149152 return parts
@@ -163,12 +166,15 @@ def get_text_from_response(response: GeminiGenerateContentResponse) -> str:
163166 return "\n " .join ([part .text for part in parts ])
164167
165168
166- def get_image_from_response (response : GeminiGenerateContentResponse ) -> Input .Image :
169+ async def get_image_from_response (response : GeminiGenerateContentResponse ) -> Input .Image :
167170 image_tensors : list [Input .Image ] = []
168171 parts = get_parts_by_type (response , "image/png" )
169172 for part in parts :
170- image_data = base64 .b64decode (part .inlineData .data )
171- returned_image = bytesio_to_image_tensor (BytesIO (image_data ))
173+ if part .inlineData :
174+ image_data = base64 .b64decode (part .inlineData .data )
175+ returned_image = bytesio_to_image_tensor (BytesIO (image_data ))
176+ else :
177+ returned_image = await download_url_to_image_tensor (part .fileData .fileUri )
172178 image_tensors .append (returned_image )
173179 if len (image_tensors ) == 0 :
174180 return torch .zeros ((1 , 1024 , 1024 , 4 ))
@@ -596,7 +602,7 @@ async def execute(
596602
597603 response = await sync_op (
598604 cls ,
599- endpoint = ApiEndpoint (path = f"{ GEMINI_BASE_ENDPOINT } /{ model } " , method = "POST" ),
605+ ApiEndpoint (path = f"/proxy/vertexai/gemini /{ model } " , method = "POST" ),
600606 data = GeminiImageGenerateContentRequest (
601607 contents = [
602608 GeminiContent (role = GeminiRole .user , parts = parts ),
@@ -610,7 +616,7 @@ async def execute(
610616 response_model = GeminiGenerateContentResponse ,
611617 price_extractor = calculate_tokens_price ,
612618 )
613- return IO .NodeOutput (get_image_from_response (response ), get_text_from_response (response ))
619+ return IO .NodeOutput (await get_image_from_response (response ), get_text_from_response (response ))
614620
615621
616622class GeminiImage2 (IO .ComfyNode ):
@@ -729,7 +735,7 @@ async def execute(
729735
730736 response = await sync_op (
731737 cls ,
732- ApiEndpoint (path = f"{ GEMINI_BASE_ENDPOINT } /{ model } " , method = "POST" ),
738+ ApiEndpoint (path = f"/proxy/vertexai/gemini /{ model } " , method = "POST" ),
733739 data = GeminiImageGenerateContentRequest (
734740 contents = [
735741 GeminiContent (role = GeminiRole .user , parts = parts ),
@@ -743,7 +749,7 @@ async def execute(
743749 response_model = GeminiGenerateContentResponse ,
744750 price_extractor = calculate_tokens_price ,
745751 )
746- return IO .NodeOutput (get_image_from_response (response ), get_text_from_response (response ))
752+ return IO .NodeOutput (await get_image_from_response (response ), get_text_from_response (response ))
747753
748754
749755class GeminiExtension (ComfyExtension ):
0 commit comments