Skip to content

Commit fc57e4c

Browse files
author
Cursor Assistant
committed
Fibo Edit
- Fibo Edit (58e1079a) - remove (cc23aeb0)
1 parent 69069e8 commit fc57e4c

File tree

10 files changed

+431
-6
lines changed

10 files changed

+431
-6
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ mflux-generate-kontext = "mflux.models.flux.cli.flux_generate_kontext:main"
8787
mflux-generate-qwen = "mflux.models.qwen.cli.qwen_image_generate:main"
8888
mflux-generate-qwen-edit = "mflux.models.qwen.cli.qwen_image_edit_generate:main"
8989
mflux-generate-fibo = "mflux.models.fibo.cli.fibo_generate:main"
90+
mflux-generate-fibo-edit = "mflux.models.fibo.cli.fibo_edit_generate:main"
9091
mflux-generate-z-image = "mflux.models.z_image.cli.z_image_generate:main"
9192
mflux-generate-z-image-turbo = "mflux.models.z_image.cli.z_image_turbo_generate:main"
9293
mflux-refine-fibo = "mflux.models.fibo_vlm.cli.fibo_refine:main"

src/mflux/cli/defaults/defaults.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"dev-krea",
2020
"qwen",
2121
"fibo",
22+
"fibo-edit",
2223
"z-image",
2324
"z-image-turbo",
2425
"flux2-klein-4b",
@@ -34,6 +35,7 @@
3435
"qwen-image": 20,
3536
"qwen-image-edit": 20,
3637
"fibo": 20,
38+
"fibo-edit": 20,
3739
"z-image": 50,
3840
"z-image-turbo": 9,
3941
"flux2-klein-4b": 4,

src/mflux/models/common/config/model_config.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@ def qwen_image_edit() -> "ModelConfig":
127127
def fibo() -> "ModelConfig":
128128
return AVAILABLE_MODELS["fibo"]
129129

130+
@staticmethod
131+
@lru_cache
132+
def fibo_edit() -> "ModelConfig":
133+
return AVAILABLE_MODELS["fibo-edit"]
134+
130135
@staticmethod
131136
@lru_cache
132137
def z_image_turbo() -> "ModelConfig":
@@ -432,8 +437,20 @@ def from_name(
432437
supports_guidance=True,
433438
requires_sigma_shift=False,
434439
),
435-
"z-image": ModelConfig(
440+
"fibo-edit": ModelConfig(
436441
priority=18,
442+
aliases=["fibo-edit", "fiboedit"],
443+
model_name="briaai/Fibo-Edit",
444+
base_model=None,
445+
controlnet_model=None,
446+
custom_transformer_model=None,
447+
num_train_steps=1000,
448+
max_sequence_length=512,
449+
supports_guidance=True,
450+
requires_sigma_shift=False,
451+
),
452+
"z-image": ModelConfig(
453+
priority=19,
437454
aliases=["z-image", "zimage"],
438455
model_name="Tongyi-MAI/Z-Image",
439456
base_model=None,
@@ -445,7 +462,7 @@ def from_name(
445462
requires_sigma_shift=True,
446463
),
447464
"z-image-turbo": ModelConfig(
448-
priority=19,
465+
priority=20,
449466
aliases=["z-image-turbo", "zimage-turbo"],
450467
model_name="Tongyi-MAI/Z-Image-Turbo",
451468
base_model=None,
@@ -457,7 +474,7 @@ def from_name(
457474
requires_sigma_shift=True,
458475
),
459476
"seedvr2-3b": ModelConfig(
460-
priority=20,
477+
priority=21,
461478
aliases=["seedvr2-3b", "seedvr2"],
462479
model_name="numz/SeedVR2_comfyUI",
463480
base_model=None,

src/mflux/models/fibo/README.md

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Most text-to-image models excel at imagination—but not control. FIBO is traine
1313
- **Strong prompt adherence**: High alignment on PRISM-style evaluations
1414
- **Enterprise-grade**: 100% licensed data with governance, repeatability, and legal clarity
1515

16-
## The three modes: Generate, Refine, and Inspire
16+
## The four modes: Generate, Edit, Refine, and Inspire
1717

1818
### Generate
1919
While the actual prompt input to FIBO is a structured JSON file, the generate command provides an interface to input pure text prompts. These are then expanded into structured JSON prompts using FIBO's Vision-Language Model (VLM) before being passed to the diffusion model for image generation.
@@ -259,6 +259,35 @@ image.save("owl_white.png")
259259

260260
It is worth noting that refine does not work the same way as other editing techniques like Flux Kontext or Qwen Image Edit. Instead of modifying an existing image, it modifies the underlying **structured prompt** to produce a new image.
261261

262+
### Edit
263+
FIBO Edit supports direct image-conditioned editing using a structured JSON prompt that includes an `edit_instruction` field.
264+
265+
```sh
266+
mflux-generate-fibo-edit \
267+
--image-path owl_original.png \
268+
--prompt-file owl_brown.json \
269+
--edit-instruction "Make the owl white and add round glasses while keeping composition unchanged." \
270+
--width 1024 \
271+
--height 560 \
272+
--steps 20 \
273+
--guidance 4.0 \
274+
--seed 42 \
275+
--output owl_white_edit.png
276+
```
277+
278+
Optional localized editing is supported with a mask:
279+
280+
```sh
281+
mflux-generate-fibo-edit \
282+
--image-path owl_original.png \
283+
--mask-path owl_mask.png \
284+
--prompt-file owl_brown.json \
285+
--edit-instruction "Replace only the owl with a white owl wearing glasses." \
286+
--steps 20 \
287+
--seed 42 \
288+
--output owl_masked_edit.png
289+
```
290+
262291
### Inspire
263292
Provide an image instead of text. FIBO's vision-language model extracts a detailed, structured prompt, blends it with your creative intent, and produces related images—ideal for inspiration without overreliance on the original.
264293

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from pathlib import Path
2+
3+
from mflux.callbacks.callback_manager import CallbackManager
4+
from mflux.cli.defaults import defaults as ui_defaults
5+
from mflux.cli.parser.parsers import CommandLineParser
6+
from mflux.models.fibo.latent_creator.fibo_latent_creator import FiboLatentCreator
7+
from mflux.models.fibo.variants.edit.fibo_edit import FIBOEdit
8+
from mflux.models.fibo.variants.edit.util import FiboEditUtil
9+
from mflux.models.fibo.variants.txt2img.util import FiboUtil
10+
from mflux.utils.dimension_resolver import DimensionResolver
11+
from mflux.utils.exceptions import PromptFileReadError, StopImageGenerationException
12+
from mflux.utils.prompt_util import PromptUtil
13+
14+
15+
def main():
16+
parser = CommandLineParser(description="Generate an edited image using Bria FIBO Edit.")
17+
parser.add_general_arguments()
18+
parser.add_model_arguments(require_model_arg=False)
19+
parser.add_lora_arguments()
20+
parser.add_image_generator_arguments(supports_metadata_config=True, supports_dimension_scale_factor=True)
21+
parser.add_argument("--image-path", type=Path, required=True, help="Local path to source image for editing.")
22+
parser.add_argument("--mask-path", type=Path, default=None, help="Optional mask image path for localized edits.")
23+
parser.add_argument(
24+
"--edit-instruction",
25+
type=str,
26+
default=None,
27+
help="Optional edit instruction. Used when prompt JSON does not already include `edit_instruction`.",
28+
)
29+
parser.add_output_arguments()
30+
args = parser.parse_args()
31+
32+
if args.guidance is None:
33+
args.guidance = ui_defaults.GUIDANCE_SCALE
34+
35+
json_prompt = FiboUtil.get_json_prompt(args, quantize=args.quantize)
36+
json_prompt = FiboEditUtil.ensure_edit_instruction(json_prompt, edit_instruction=args.edit_instruction)
37+
38+
fibo_edit = FIBOEdit(
39+
quantize=args.quantize,
40+
model_path=args.model_path,
41+
lora_paths=args.lora_paths,
42+
lora_scales=args.lora_scales,
43+
)
44+
45+
memory_saver = CallbackManager.register_callbacks(
46+
args=args,
47+
model=fibo_edit,
48+
latent_creator=FiboLatentCreator,
49+
)
50+
51+
try:
52+
width, height = DimensionResolver.resolve(
53+
width=args.width,
54+
height=args.height,
55+
reference_image_path=args.image_path,
56+
)
57+
for seed in args.seed:
58+
image = fibo_edit.generate_image(
59+
seed=seed,
60+
prompt=json_prompt,
61+
image_path=args.image_path,
62+
mask_path=args.mask_path,
63+
width=width,
64+
height=height,
65+
guidance=args.guidance,
66+
num_inference_steps=args.steps,
67+
scheduler="flow_match_euler_discrete",
68+
negative_prompt=PromptUtil.read_negative_prompt(args),
69+
)
70+
image.save(path=args.output.format(seed=seed), export_json_metadata=args.metadata)
71+
except (StopImageGenerationException, PromptFileReadError, ValueError) as exc:
72+
print(exc)
73+
finally:
74+
if memory_saver:
75+
print(memory_saver.memory_stats())
76+
77+
78+
if __name__ == "__main__":
79+
main()

src/mflux/models/fibo/model/fibo_transformer/transformer.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,29 @@ def __call__(
3535
hidden_states: mx.array,
3636
encoder_hidden_states: mx.array,
3737
text_encoder_layers: list[mx.array],
38+
conditioning_seq_len: int = 0,
39+
conditioning_image_ids: mx.array | None = None,
3840
) -> mx.array:
3941
# 1. Create embeddings
4042
hidden_states = FiboTransformer._handle_classifier_free_guidance(hidden_states, encoder_hidden_states)
4143
hidden_states = self.x_embedder(hidden_states)
4244
encoder_hidden_states = self.context_embedder(encoder_hidden_states)
4345
time_embeddings = FiboTransformer._compute_time_embeddings(t, config, hidden_states.shape[0], hidden_states.dtype, self.time_embed) # fmt: off
44-
image_rotary_emb = FiboTransformer._compute_rotary_embeddings(encoder_hidden_states, self.pos_embed, config, hidden_states.dtype) # fmt: off
46+
image_rotary_emb = FiboTransformer._compute_rotary_embeddings(
47+
encoder_hidden_states=encoder_hidden_states,
48+
pos_embed=self.pos_embed,
49+
config=config,
50+
dtype=hidden_states.dtype,
51+
conditioning_image_ids=conditioning_image_ids,
52+
)
4553

4654
# 2. Compute attention mask
4755
attention_mask = FiboTransformer._compute_attention_mask(
4856
config=config,
4957
batch_size=hidden_states.shape[0],
5058
encoder_hidden_states=encoder_hidden_states,
5159
max_tokens=encoder_hidden_states.shape[1],
60+
conditioning_seq_len=conditioning_seq_len,
5261
)
5362

5463
# 3. Project the fibo-specific text encoder layers
@@ -165,10 +174,15 @@ def _compute_rotary_embeddings(
165174
pos_embed: FiboEmbedND,
166175
config: Config,
167176
dtype: mx.Dtype,
177+
conditioning_image_ids: mx.array | None = None,
168178
) -> mx.array:
169179
max_tokens = encoder_hidden_states.shape[1]
170180
txt_ids = mx.zeros((max_tokens, 3), dtype=dtype)
171181
img_ids = FiboTransformer._prepare_latent_image_ids(height=config.height, width=config.width, dtype=dtype)
182+
if conditioning_image_ids is not None:
183+
if conditioning_image_ids.ndim == 3:
184+
conditioning_image_ids = conditioning_image_ids[0]
185+
img_ids = mx.concatenate([img_ids, conditioning_image_ids.astype(dtype)], axis=0)
172186

173187
if txt_ids.ndim == 3 and txt_ids.shape[0] == 1:
174188
txt_ids = txt_ids[0]
@@ -212,14 +226,21 @@ def _compute_attention_mask(
212226
config: Config,
213227
encoder_hidden_states: mx.array,
214228
max_tokens: int,
229+
conditioning_seq_len: int = 0,
215230
) -> mx.array:
216231
vae_scale_factor = 16
217232
latent_height = config.height // vae_scale_factor
218233
latent_width = config.width // vae_scale_factor
219234
latent_seq_len = latent_height * latent_width
220235
prompt_attention_mask = mx.ones((batch_size, max_tokens), dtype=mx.float32)
221236
latent_attention_mask = mx.ones((batch_size, latent_seq_len), dtype=mx.float32)
222-
attention_mask_2d = mx.concatenate([prompt_attention_mask, latent_attention_mask], axis=1)
237+
if conditioning_seq_len > 0:
238+
conditioning_attention_mask = mx.ones((batch_size, conditioning_seq_len), dtype=mx.float32)
239+
attention_mask_2d = mx.concatenate(
240+
[prompt_attention_mask, latent_attention_mask, conditioning_attention_mask], axis=1
241+
)
242+
else:
243+
attention_mask_2d = mx.concatenate([prompt_attention_mask, latent_attention_mask], axis=1)
223244
attention_mask = FiboTransformer._prepare_attention_mask(attention_mask_2d)
224245
attention_mask = attention_mask.astype(encoder_hidden_states.dtype)
225246
return attention_mask
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .fibo_edit import FIBOEdit
2+
3+
__all__ = ["FIBOEdit"]

0 commit comments

Comments
 (0)