Skip to content

Commit be6d9d9

Browse files
committed
upload image generation docs
1 parent 0050c91 commit be6d9d9

File tree

2 files changed

+124
-158
lines changed

2 files changed

+124
-158
lines changed

docs/zh/notes/mm_guide/image_video_generation/image_editing.md

Lines changed: 27 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ cd ./Dataflow-MM
4040
conda create -n Dataflow-MM python=3.12
4141
pip install -e .
4242
```
43+
接着进行初始化(**非常重要的一步,务必执行**
44+
```bash
45+
dataflowmm init
46+
cd gpu_pipelines/
47+
```
4348

4449
### 第二步 编辑数据数据准备
4550
我们使用`jsonl`文件来记录数据,下面是一个简单的输入数据样例:
@@ -62,40 +67,17 @@ storage = FileStorage(
6267
### 第三步 运行流水线
6368
可以参考下述指令运行图片编辑流水线。
6469
```bash
65-
python /path/to/DataFlow-MM/test/test_image_editing.py --serving_type 'local'
70+
python image_editing_pipeline.py --serving_type 'local'
6671
```
6772
6873
```bash
69-
python /path/to/DataFlow-MM/test/test_image_editing.py --api_key \<your_key\> --api_url \<your_url\>
74+
export DF_API_KEY=<api_key>
75+
python image_editing_pipeline.py --api_url \<your_url\>
7076
```
7177

7278
## 3. 流水线逻辑
7379
### 3.1 提示词设计
74-
利用在线LLM模型(例如gpt-4o)初始化提示词生成器
75-
```python
76-
from dataflow.operators.image_generation import PromptedT2ITextGenerator
77-
78-
os.environ["DF_API_KEY"] = api_key
79-
serving = APILLMServing_request(
80-
api_url=api_url,
81-
model_name="gpt-4o",
82-
max_workers=5,
83-
)
84-
85-
text_to_image_sample_generator = PromptedT2ITextGenerator(
86-
llm_serving=serving,
87-
)
88-
```
89-
运行提示词生成器
90-
```python
91-
text_to_image_sample_generator.run(
92-
storage=storage.step(),
93-
input_style_key = "input_style",
94-
input_prompt_key = "input_text",
95-
output_prompt_key = "instruction",
96-
output_prompt_key_2 = "output_img_discript",
97-
)
98-
```
80+
利用本地/在线LLM模型(例如gpt-4o)以及设定的数据配比,构建图片生成样例
9981

10082
### 3.2 图片条件获取
10183
我们使用文本到图片生成模型,本文到图片生成器初始化方式如下
@@ -176,7 +158,7 @@ self.generator.run(
176158
```jsonl
177159
{
178160
"conversations": [{"content": "The woman is dancing with the prince in a sacred ballroom.", "role": "user"}],
179-
"images": ["./dataflow/example/test_image_editing/images/image3.png"],
161+
"images": ["../example_data/image_gen/image_edit/image3.png"],
180162
"edited_images": [""]
181163
}
182164
```
@@ -185,15 +167,17 @@ self.generator.run(
185167
这里支持本地编辑模型以及在线编辑模型两种生成方式,同时支持多张图片作为输入的
186168
- 本地编辑模型图片编辑流水线
187169
```bash
188-
python /path/to/DataFlow-MM/test/test_image_editing.py --serving_type 'local'
170+
python /path/to/DataFlow-MM/test/image_editing_pipeline.py --serving_type 'local'
189171
```
190172
- 在线编辑模型图片编辑流水线
191173
```bash
192-
python /path/to/DataFlow-MM/test/test_image_editing.py --api_key <your_key> --api_url <your_url>
174+
export DF_API_KEY=<api_key>
175+
python /path/to/DataFlow-MM/test/image_editing_pipeline.py --api_url <your_url>
193176
```
194177
- 多张图片输入的图片编辑流水线
195178
```bash
196-
python /path/to/DataFlow-MM/test/test_multi_images_to_image_generation.py --api_key <your_key> --api_url <your_url>
179+
export DF_API_KEY=<api_key>
180+
python /path/to/DataFlow-MM/test/multi_images_to_image_generation_pipeline.py --api_url <your_url>
197181
```
198182

199183
## 6. 流水线示例
@@ -208,8 +192,8 @@ self.generator.run(
208192
class ImageGenerationPipeline():
209193
def __init__(self, serving_type="local", api_key="", api_url="http://123.129.219.111:3000/v1/"):
210194
self.storage = FileStorage(
211-
first_entry_file_name="./dataflow/example/image_gen/image_edit/prompts.jsonl",
212-
cache_path="./cache_local/multi2single_image_gen",
195+
first_entry_file_name="../example_data/image_gen/image_edit/prompts.jsonl",
196+
cache_path="./cache_local/image_editing",
213197
file_name_prefix="dataflow_cache_step",
214198
cache_type="jsonl"
215199
)
@@ -251,8 +235,8 @@ self.generator.run(
251235
def __init__(self, serving_type="local", api_key="", api_url="http://123.129.219.111:3000/v1/"):
252236
os.environ['DF_API_KEY'] = api_key
253237
self.storage = FileStorage(
254-
first_entry_file_name="./dataflow/example/image_gen/image_edit/prompts.jsonl",
255-
cache_path="./cache_local/multi2single_image_gen",
238+
first_entry_file_name="../example_data/image_gen/image_edit/prompts.jsonl",
239+
cache_path="./cache_local/image_editing",
256240
file_name_prefix="dataflow_cache_step",
257241
cache_type="jsonl"
258242
)
@@ -281,13 +265,11 @@ self.generator.run(
281265
- **多图输入图片编辑数据合成流水线**
282266
```python
283267
import os
284-
from dataflow.operators.image_generation import PromptedT2ITextGenerator
268+
import argparse
285269
from dataflow.operators.core_vision import PromptedImageGenerator
286270
from dataflow.operators.core_vision import PromptedImageEditGenerator
287-
from dataflow.serving.api_llm_serving_request import APILLMServing_request
288271
from dataflow.serving.api_vlm_serving_openai import APIVLMServing_openai
289272
from dataflow.serving.local_image_gen_serving import LocalImageGenServing
290-
from dataflow.prompts.image_gen_prompt_generator import MultiImagesToImagePromptGenerator
291273
from dataflow.utils.storage import FileStorage
292274
from dataflow.io import ImageIO
293275

@@ -296,47 +278,32 @@ self.generator.run(
296278
def __init__(
297279
self,
298280
serving_type="api",
299-
api_key="",
300-
api_url="https://api.openai.com/v1/",
301-
api_vlm_url="https://api.openai.com/v1/",
281+
api_url="https://api.openai.com/v1/",
302282
ip_condition_num=1,
303283
repeat_times=1
304284
):
305285
self.storage = FileStorage(
306-
first_entry_file_name="./dataflow/example/image_gen/multi_image_input_gen/prompts.jsonl",
307-
cache_path="./cache_local/multi_images_to_image_gen",
286+
first_entry_file_name="../example_data/image_gen/multi_image_input_gen/prompts.jsonl",
287+
cache_path="./cache_local/multi_subjects_driven_image_generation",
308288
file_name_prefix="dataflow_cache_step",
309289
cache_type="jsonl"
310290
)
311-
312-
os.environ["DF_API_KEY"] = api_key
313-
self.serving = APILLMServing_request(
314-
api_url=api_url,
315-
model_name="gpt-4o",
316-
max_workers=5,
317-
)
318291

319292
self.t2i_serving = LocalImageGenServing(
320293
image_io=ImageIO(save_path=os.path.join(self.storage.cache_path, "condition_images")),
321-
batch_size=8,
322-
hf_model_name_or_path="/ytech_m2v5_hdd/CheckPoints/FLUX.1-dev",
294+
batch_size=4,
295+
hf_model_name_or_path="/ytech_m2v5_hdd/CheckPoints/FLUX.1-dev", # "black-forest-labs/FLUX.1-dev"
323296
hf_cache_dir="./cache_local",
324297
hf_local_dir="./ckpt/models/"
325298
)
326299

327300
self.vlm_serving = APIVLMServing_openai(
328-
api_url=api_vlm_url,
329-
model_name="gemini-2.5-flash-image-preview",
301+
api_url=api_url,
302+
model_name="gemini-2.5-flash-image-preview", # try nano-banana
330303
image_io=ImageIO(save_path=os.path.join(self.storage.cache_path, "target_images")),
331304
# send_request_stream=True, # if use ip http://123.129.219.111:3000/ add this line
332305
)
333306

334-
self.t2i_text_prompt_generator = MultiImagesToImagePromptGenerator()
335-
336-
self.text_to_image_sample_generator = PromptedT2ITextGenerator(
337-
llm_serving=self.serving,
338-
)
339-
340307
self.text_to_image_generator = PromptedImageGenerator(
341308
t2i_serving=self.t2i_serving,
342309
)
@@ -346,15 +313,6 @@ self.generator.run(
346313
)
347314

348315
def forward(self):
349-
self.text_to_image_sample_generator.run(
350-
storage=self.storage.step(),
351-
prompt_generator=self.t2i_text_prompt_generator,
352-
input_style_key = "input_style",
353-
input_prompt_key = "input_text",
354-
output_prompt_key = "instruction",
355-
output_prompt_key_2 = "output_img_discript",
356-
)
357-
358316
self.text_to_image_generator.run(
359317
storage=self.storage.step(),
360318
input_conversation_key="input_text",

0 commit comments

Comments
 (0)