@@ -40,6 +40,11 @@ cd ./Dataflow-MM
4040conda create -n Dataflow-MM python=3.12
4141pip install -e .
4242```
43+ 接着进行初始化(** 非常重要的一步,务必执行** )
44+ ``` bash
45+ dataflowmm init
46+ cd gpu_pipelines/
47+ ```
4348
4449### 第二步 编辑数据数据准备
4550我们使用` jsonl ` 文件来记录数据,下面是一个简单的输入数据样例:
@@ -62,40 +67,17 @@ storage = FileStorage(
6267### 第三步 运行流水线
6368可以参考下述指令运行图片编辑流水线。
6469``` bash
65- python /path/to/DataFlow-MM/test/test_image_editing .py --serving_type ' local'
70+ python image_editing_pipeline .py --serving_type ' local'
6671```
6772或
6873``` bash
69- python /path/to/DataFlow-MM/test/test_image_editing.py --api_key \< your_key\> --api_url \< your_url\>
74+ export DF_API_KEY=< api_key>
75+ python image_editing_pipeline.py --api_url \< your_url\>
7076```
7177
7278## 3. 流水线逻辑
7379### 3.1 提示词设计
74- 利用在线LLM模型(例如gpt-4o)初始化提示词生成器
75- ``` python
76- from dataflow.operators.image_generation import PromptedT2ITextGenerator
77-
78- os.environ[" DF_API_KEY" ] = api_key
79- serving = APILLMServing_request(
80- api_url = api_url,
81- model_name = " gpt-4o" ,
82- max_workers = 5 ,
83- )
84-
85- text_to_image_sample_generator = PromptedT2ITextGenerator(
86- llm_serving = serving,
87- )
88- ```
89- 运行提示词生成器
90- ``` python
91- text_to_image_sample_generator.run(
92- storage = storage.step(),
93- input_style_key = " input_style" ,
94- input_prompt_key = " input_text" ,
95- output_prompt_key = " instruction" ,
96- output_prompt_key_2 = " output_img_discript" ,
97- )
98- ```
80+ 利用本地/在线LLM模型(例如gpt-4o)以及设定的数据配比,构建图片生成样例
9981
10082### 3.2 图片条件获取
10183我们使用文本到图片生成模型,本文到图片生成器初始化方式如下
@@ -176,7 +158,7 @@ self.generator.run(
176158 ``` jsonl
177159 {
178160 "conversations" : [{"content" : " The woman is dancing with the prince in a sacred ballroom." , "role" : " user" }],
179- "images" : [" ./dataflow/example/test_image_editing/images /image3.png" ],
161+ "images" : [" ../example_data/image_gen/image_edit /image3.png" ],
180162 "edited_images" : [" " ]
181163 }
182164 ```
@@ -185,15 +167,17 @@ self.generator.run(
185167这里支持本地编辑模型以及在线编辑模型两种生成方式,同时支持多张图片作为输入的
186168- 本地编辑模型图片编辑流水线
187169 ``` bash
188- python /path/to/DataFlow-MM/test/test_image_editing .py --serving_type ' local'
170+ python /path/to/DataFlow-MM/test/image_editing_pipeline .py --serving_type ' local'
189171 ```
190172- 在线编辑模型图片编辑流水线
191173 ``` bash
192- python /path/to/DataFlow-MM/test/test_image_editing.py --api_key < your_key> --api_url < your_url>
174+ export DF_API_KEY=< api_key>
175+ python /path/to/DataFlow-MM/test/image_editing_pipeline.py --api_url < your_url>
193176 ```
194177- 多张图片输入的图片编辑流水线
195178 ``` bash
196- python /path/to/DataFlow-MM/test/test_multi_images_to_image_generation.py --api_key < your_key> --api_url < your_url>
179+ export DF_API_KEY=< api_key>
180+ python /path/to/DataFlow-MM/test/multi_images_to_image_generation_pipeline.py --api_url < your_url>
197181 ```
198182
199183## 6. 流水线示例
@@ -208,8 +192,8 @@ self.generator.run(
208192 class ImageGenerationPipeline ():
209193 def __init__ (self , serving_type = " local" , api_key = " " , api_url = " http://123.129.219.111:3000/v1/" ):
210194 self .storage = FileStorage(
211- first_entry_file_name = " ./dataflow/example /image_gen/image_edit/prompts.jsonl" ,
212- cache_path = " ./cache_local/multi2single_image_gen " ,
195+ first_entry_file_name = " ../example_data /image_gen/image_edit/prompts.jsonl" ,
196+ cache_path = " ./cache_local/image_editing " ,
213197 file_name_prefix = " dataflow_cache_step" ,
214198 cache_type = " jsonl"
215199 )
@@ -251,8 +235,8 @@ self.generator.run(
251235 def __init__ (self , serving_type = " local" , api_key = " " , api_url = " http://123.129.219.111:3000/v1/" ):
252236 os.environ[' DF_API_KEY' ] = api_key
253237 self .storage = FileStorage(
254- first_entry_file_name = " ./dataflow/example /image_gen/image_edit/prompts.jsonl" ,
255- cache_path = " ./cache_local/multi2single_image_gen " ,
238+ first_entry_file_name = " ../example_data /image_gen/image_edit/prompts.jsonl" ,
239+ cache_path = " ./cache_local/image_editing " ,
256240 file_name_prefix = " dataflow_cache_step" ,
257241 cache_type = " jsonl"
258242 )
@@ -281,13 +265,11 @@ self.generator.run(
281265- ** 多图输入图片编辑数据合成流水线** :
282266 ``` python
283267 import os
284- from dataflow.operators.image_generation import PromptedT2ITextGenerator
268+ import argparse
285269 from dataflow.operators.core_vision import PromptedImageGenerator
286270 from dataflow.operators.core_vision import PromptedImageEditGenerator
287- from dataflow.serving.api_llm_serving_request import APILLMServing_request
288271 from dataflow.serving.api_vlm_serving_openai import APIVLMServing_openai
289272 from dataflow.serving.local_image_gen_serving import LocalImageGenServing
290- from dataflow.prompts.image_gen_prompt_generator import MultiImagesToImagePromptGenerator
291273 from dataflow.utils.storage import FileStorage
292274 from dataflow.io import ImageIO
293275
@@ -296,47 +278,32 @@ self.generator.run(
296278 def __init__ (
297279 self ,
298280 serving_type = " api" ,
299- api_key = " " ,
300- api_url = " https://api.openai.com/v1/" ,
301- api_vlm_url = " https://api.openai.com/v1/" ,
281+ api_url = " https://api.openai.com/v1/" ,
302282 ip_condition_num = 1 ,
303283 repeat_times = 1
304284 ):
305285 self .storage = FileStorage(
306- first_entry_file_name = " ./dataflow/example /image_gen/multi_image_input_gen/prompts.jsonl" ,
307- cache_path = " ./cache_local/multi_images_to_image_gen " ,
286+ first_entry_file_name = " ../example_data /image_gen/multi_image_input_gen/prompts.jsonl" ,
287+ cache_path = " ./cache_local/multi_subjects_driven_image_generation " ,
308288 file_name_prefix = " dataflow_cache_step" ,
309289 cache_type = " jsonl"
310290 )
311-
312- os.environ[" DF_API_KEY" ] = api_key
313- self .serving = APILLMServing_request(
314- api_url = api_url,
315- model_name = " gpt-4o" ,
316- max_workers = 5 ,
317- )
318291
319292 self .t2i_serving = LocalImageGenServing(
320293 image_io = ImageIO(save_path = os.path.join(self .storage.cache_path, " condition_images" )),
321- batch_size = 8 ,
322- hf_model_name_or_path = " /ytech_m2v5_hdd/CheckPoints/FLUX.1-dev" ,
294+ batch_size = 4 ,
295+ hf_model_name_or_path = " /ytech_m2v5_hdd/CheckPoints/FLUX.1-dev" , # "black-forest-labs/FLUX.1-dev"
323296 hf_cache_dir = " ./cache_local" ,
324297 hf_local_dir = " ./ckpt/models/"
325298 )
326299
327300 self .vlm_serving = APIVLMServing_openai(
328- api_url = api_vlm_url ,
329- model_name = " gemini-2.5-flash-image-preview" ,
301+ api_url = api_url ,
302+ model_name = " gemini-2.5-flash-image-preview" , # try nano-banana
330303 image_io = ImageIO(save_path = os.path.join(self .storage.cache_path, " target_images" )),
331304 # send_request_stream=True, # if use ip http://123.129.219.111:3000/ add this line
332305 )
333306
334- self .t2i_text_prompt_generator = MultiImagesToImagePromptGenerator()
335-
336- self .text_to_image_sample_generator = PromptedT2ITextGenerator(
337- llm_serving = self .serving,
338- )
339-
340307 self .text_to_image_generator = PromptedImageGenerator(
341308 t2i_serving = self .t2i_serving,
342309 )
@@ -346,15 +313,6 @@ self.generator.run(
346313 )
347314
348315 def forward (self ):
349- self .text_to_image_sample_generator.run(
350- storage = self .storage.step(),
351- prompt_generator = self .t2i_text_prompt_generator,
352- input_style_key = " input_style" ,
353- input_prompt_key = " input_text" ,
354- output_prompt_key = " instruction" ,
355- output_prompt_key_2 = " output_img_discript" ,
356- )
357-
358316 self .text_to_image_generator.run(
359317 storage = self .storage.step(),
360318 input_conversation_key = " input_text" ,
0 commit comments