22import requests
33import base64
44from collections .abc import Generator
5- from typing import Any , Dict , List
5+ from typing import Any , Dict , List , Optional
66
77from dify_plugin import Tool
88from dify_plugin .entities .tool import ToolInvokeMessage
1212class DoubaoTool (Tool ):
1313 """
1414 Doubao Seedream Image Generation Tool
15- Uses doubao-seedream-4-0-250828 model for high-quality Chinese image generation
15+ Uses doubao-seedream-4-0-250828 and doubao-seedream-4-5 models for high-quality Chinese image generation.
16+
17+ Supports three generation modes:
18+ - Text-to-Image: Generate images from text prompts
19+ - Image-to-Image: Transform existing images based on prompts
20+ - Sequential Generation: Create consistent series of images (storyboards, variations)
1621 """
1722
1823 # API endpoints
1924 BASE_URL = "https://aihubmix.com/v1"
20- PREDICTIONS_ENDPOINT = f"{ BASE_URL } /models/doubao/doubao-seedream-4-0-250828/predictions"
25+
26+ # Model endpoints
27+ MODEL_SEEDREAM_4_0 = "doubao-seedream-4-0-250828"
28+ MODEL_SEEDREAM_4_5 = "doubao-seedream-4-5"
29+
30+ def get_endpoint (self , model : str ) -> str :
31+ """Get the appropriate endpoint based on model selection"""
32+ return f"{ self .BASE_URL } /models/doubao/{ model } /predictions"
33+
34+ # Generation modes
35+ MODE_TEXT_TO_IMAGE = "text_to_image"
36+ MODE_IMAGE_TO_IMAGE = "image_to_image"
37+ MODE_SEQUENTIAL = "sequential"
2138
2239 def create_image_info (self , base64_data : str , size : str ) -> dict :
2340 mime_type = "image/png"
@@ -26,6 +43,72 @@ def create_image_info(self, base64_data: str, size: str) -> dict:
2643 "size" : size
2744 }
2845
46+ def _process_image_input (self , image_input : Any ) -> str :
47+ """
48+ Process image input from various sources and return a valid URL/data URL for API
49+
50+ Args:
51+ image_input: Can be URL string, data URL, base64 string, or Dify file object
52+
53+ Returns:
54+ Valid image URL or data URL for the API
55+ """
56+ if not image_input :
57+ raise InvokeError ("Reference image is required for Image-to-Image mode" )
58+
59+ image_str = str (image_input ).strip ()
60+
61+ # Check if it's a Dify file transfer variable or file object
62+ if isinstance (image_input , dict ):
63+ # Dify file variable format
64+ if "type" in image_input and image_input ["type" ] == "image" :
65+ # Extract URL from file object
66+ if "transfer_method" in image_input :
67+ if image_input ["transfer_method" ] == "remote_url" :
68+ return image_input .get ("url" , "" )
69+ elif image_input ["transfer_method" ] == "local_file" :
70+ # For local files, we need to use base64
71+ if "base64_data" in image_input :
72+ return f"data:image/png;base64,{ image_input ['base64_data' ]} "
73+ elif "url" in image_input :
74+ return image_input ["url" ]
75+ elif "url" in image_input :
76+ return image_input ["url" ]
77+ elif "base64_data" in image_input :
78+ return f"data:image/png;base64,{ image_input ['base64_data' ]} "
79+ # Fallback for other dict formats
80+ image_str = str (image_input )
81+
82+ # Check if text contains URL (for workflow connections where user connected text output)
83+ # This handles cases where user connected 'text' output instead of 'files' output
84+ import re
85+ url_pattern = r'https?://[^\s\)]+'
86+ urls = re .findall (url_pattern , image_str )
87+ if urls :
88+ # Return first URL found in text
89+ return urls [0 ]
90+
91+ # Already a valid URL format
92+ if image_str .startswith ('http://' ) or image_str .startswith ('https://' ):
93+ return image_str
94+
95+ # Data URL format
96+ if image_str .startswith ('data:image/' ):
97+ return image_str
98+
99+ # Base64 encoded image (with or without prefix)
100+ if image_str .startswith ('iVBORw0KGgo' ) or '/' in image_str or '=' in image_str :
101+ # Likely base64 encoded
102+ try :
103+ # Try to decode to verify it's valid base64
104+ base64 .b64decode (image_str )
105+ return f"data:image/png;base64,{ image_str } "
106+ except :
107+ pass
108+
109+ # If none of the above, try as file path or return as-is
110+ return image_str
111+
29112 def _invoke (self , tool_parameters : dict [str , Any ]) -> Generator [ToolInvokeMessage ]:
30113 """
31114 Main invoke method for Doubao Seedream image generation
@@ -36,12 +119,41 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
36119 if not prompt :
37120 raise InvokeError ("Prompt is required" )
38121
39- size = tool_parameters .get ("size" , "2K" )
122+ # Generation mode
123+ generation_mode = tool_parameters .get ("generation_mode" , self .MODE_TEXT_TO_IMAGE )
124+
125+ # Image-to-image parameters
126+ reference_image_input = tool_parameters .get ("reference_image" , "" )
127+ image_strength = float (tool_parameters .get ("image_strength" , 0.5 ))
128+
129+ # Sequential generation options
40130 sequential_image_generation = tool_parameters .get ("sequential_image_generation" , "disabled" )
131+ max_sequential_images = int (tool_parameters .get ("max_sequential_images" , 4 ))
132+
133+ # Model selection
134+ model = tool_parameters .get ("model" , self .MODEL_SEEDREAM_4_0 )
135+
136+ # Common parameters
137+ size = tool_parameters .get ("size" , "2K" )
41138 stream = tool_parameters .get ("stream" , False )
42139 response_format = tool_parameters .get ("response_format" , "url" )
43140 watermark = tool_parameters .get ("watermark" , True )
44141
142+ # Validate parameters based on mode
143+ if generation_mode == self .MODE_IMAGE_TO_IMAGE :
144+ if not reference_image_input :
145+ raise InvokeError ("Reference image is required for Image-to-Image mode" )
146+
147+ # Process reference image if in image-to-image mode
148+ reference_image = ""
149+ if generation_mode == self .MODE_IMAGE_TO_IMAGE :
150+ reference_image = self ._process_image_input (reference_image_input )
151+
152+ if sequential_image_generation == "enabled" and max_sequential_images < 1 :
153+ raise InvokeError ("Max sequential images must be at least 1" )
154+ if sequential_image_generation == "enabled" and max_sequential_images > 8 :
155+ raise InvokeError ("Max sequential images cannot exceed 8" )
156+
45157 # Get API key from credentials
46158 api_key = self .runtime .credentials .get ("api_key" )
47159 if not api_key :
@@ -53,23 +165,49 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
53165 "Content-Type" : "application/json"
54166 }
55167
56- # Prepare request payload for Doubao Seedream according to API documentation
57- payload = {
58- "input" : {
59- "prompt" : prompt ,
60- "size" : size ,
61- "sequential_image_generation" : sequential_image_generation ,
62- "stream" : stream ,
63- "response_format" : response_format ,
64- "watermark" : watermark
168+ # Prepare request payload based on generation mode
169+ input_payload = {
170+ "prompt" : prompt ,
171+ "size" : size ,
172+ "sequential_image_generation" : sequential_image_generation ,
173+ "stream" : stream ,
174+ "response_format" : response_format ,
175+ "watermark" : watermark
176+ }
177+
178+ # Add mode-specific parameters
179+ if generation_mode == self .MODE_IMAGE_TO_IMAGE :
180+ input_payload ["image" ] = reference_image
181+ input_payload ["image_strength" ] = image_strength
182+
183+ if sequential_image_generation == "enabled" :
184+ input_payload ["sequential_image_generation_options" ] = {
185+ "max_images" : max_sequential_images
65186 }
187+
188+ payload = {"input" : input_payload }
189+
190+ # Generate status message based on mode
191+ mode_descriptions = {
192+ self .MODE_TEXT_TO_IMAGE : "文生图" ,
193+ self .MODE_IMAGE_TO_IMAGE : "图生图" ,
194+ self .MODE_SEQUENTIAL : "组图输出"
66195 }
196+ mode_name = mode_descriptions .get (generation_mode , generation_mode )
67197
68- yield self .create_text_message (f"Generating image with Doubao Seedream ({ size } size)..." )
198+ if generation_mode == self .MODE_IMAGE_TO_IMAGE :
199+ # Truncate for display
200+ display_ref = reference_image [:50 ] if len (reference_image ) > 50 else reference_image
201+ yield self .create_text_message (f"使用 Doubao Seedream 进行{ mode_name } ,参考图: { display_ref } ..." )
202+ elif generation_mode == self .MODE_SEQUENTIAL :
203+ yield self .create_text_message (f"使用 Doubao Seedream 进行{ mode_name } ,生成 { max_sequential_images } 张连贯图像..." )
204+ else :
205+ yield self .create_text_message (f"使用 Doubao Seedream 进行{ mode_name } ({ size } )..." )
69206
70- # Make API request
207+ # Make API request with dynamic endpoint
208+ endpoint = self .get_endpoint (model )
71209 response = requests .post (
72- self . PREDICTIONS_ENDPOINT ,
210+ endpoint ,
73211 headers = headers ,
74212 json = payload ,
75213 timeout = 60
@@ -115,7 +253,7 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
115253 # Return results as JSON
116254 yield self .create_json_message ({
117255 "success" : True ,
118- "model" : "doubao/doubao-seedream-4-0-250828 " ,
256+ "model" : f "doubao/{ model } " ,
119257 "prompt" : prompt ,
120258 "num_images" : len (images ),
121259 "images" : images ,
@@ -127,8 +265,8 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
127265 })
128266
129267 # Also create text message with image URLs
130- image_urls = "\n " .join ([f"- { img ['url' ]} " for img in images ])
131- yield self .create_text_message (f"Doubao Seedream generated { len ( images ) } image(s): \n { image_urls } " )
268+ image_urls = "\n " .join ([img ['url' ] for img in images ])
269+ yield self .create_text_message (image_urls )
132270
133271 except Exception as e :
134272 raise InvokeError (f"Doubao Seedream image generation failed: { str (e )} " )
0 commit comments