Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ mflux-generate-kontext = "mflux.models.flux.cli.flux_generate_kontext:main"
mflux-generate-qwen = "mflux.models.qwen.cli.qwen_image_generate:main"
mflux-generate-qwen-edit = "mflux.models.qwen.cli.qwen_image_edit_generate:main"
mflux-generate-fibo = "mflux.models.fibo.cli.fibo_generate:main"
mflux-generate-fibo-edit = "mflux.models.fibo.cli.fibo_edit_generate:main"
mflux-generate-z-image = "mflux.models.z_image.cli.z_image_generate:main"
mflux-generate-z-image-turbo = "mflux.models.z_image.cli.z_image_turbo_generate:main"
mflux-refine-fibo = "mflux.models.fibo_vlm.cli.fibo_refine:main"
Expand Down
2 changes: 2 additions & 0 deletions src/mflux/cli/defaults/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"dev-krea",
"qwen",
"fibo",
"fibo-edit",
"z-image",
"z-image-turbo",
"flux2-klein-4b",
Expand All @@ -34,6 +35,7 @@
"qwen-image": 20,
"qwen-image-edit": 20,
"fibo": 20,
"fibo-edit": 20,
"z-image": 50,
"z-image-turbo": 9,
"flux2-klein-4b": 4,
Expand Down
23 changes: 20 additions & 3 deletions src/mflux/models/common/config/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,11 @@ def qwen_image_edit() -> "ModelConfig":
def fibo() -> "ModelConfig":
return AVAILABLE_MODELS["fibo"]

@staticmethod
@lru_cache
def fibo_edit() -> "ModelConfig":
return AVAILABLE_MODELS["fibo-edit"]

@staticmethod
@lru_cache
def z_image_turbo() -> "ModelConfig":
Expand Down Expand Up @@ -453,8 +458,20 @@ def from_name(
supports_guidance=True,
requires_sigma_shift=False,
),
"z-image": ModelConfig(
"fibo-edit": ModelConfig(
priority=18,
aliases=["fibo-edit", "fiboedit"],
model_name="briaai/Fibo-Edit",
base_model=None,
controlnet_model=None,
custom_transformer_model=None,
num_train_steps=1000,
max_sequence_length=512,
supports_guidance=True,
requires_sigma_shift=False,
),
"z-image": ModelConfig(
priority=19,
aliases=["z-image", "zimage"],
model_name="Tongyi-MAI/Z-Image",
base_model=None,
Expand All @@ -466,7 +483,7 @@ def from_name(
requires_sigma_shift=True,
),
"z-image-turbo": ModelConfig(
priority=19,
priority=20,
aliases=["z-image-turbo", "zimage-turbo"],
model_name="Tongyi-MAI/Z-Image-Turbo",
base_model=None,
Expand All @@ -478,7 +495,7 @@ def from_name(
requires_sigma_shift=True,
),
"seedvr2-3b": ModelConfig(
priority=20,
priority=21,
aliases=["seedvr2-3b", "seedvr2"],
model_name="numz/SeedVR2_comfyUI",
base_model=None,
Expand Down
31 changes: 30 additions & 1 deletion src/mflux/models/fibo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Most text-to-image models excel at imagination—but not control. FIBO is traine
- **Strong prompt adherence**: High alignment on PRISM-style evaluations
- **Enterprise-grade**: 100% licensed data with governance, repeatability, and legal clarity

## The three modes: Generate, Refine, and Inspire
## The four modes: Generate, Edit, Refine, and Inspire

### Generate
While the actual prompt input to FIBO is a structured JSON file, the generate command provides an interface to input pure text prompts. These are then expanded into structured JSON prompts using FIBO's Vision-Language Model (VLM) before being passed to the diffusion model for image generation.
Expand Down Expand Up @@ -259,6 +259,35 @@ image.save("owl_white.png")

It is worth noting that refine does not work the same way as other editing techniques like Flux Kontext or Qwen Image Edit. Instead of modifying an existing image, it modifies the underlying **structured prompt** to produce a new image.

### Edit
FIBO Edit supports direct image-conditioned editing using a structured JSON prompt that includes an `edit_instruction` field.

```sh
mflux-generate-fibo-edit \
--image-path owl_original.png \
--prompt-file owl_brown.json \
--edit-instruction "Make the owl white and add round glasses while keeping composition unchanged." \
--width 1024 \
--height 560 \
--steps 20 \
--guidance 4.0 \
--seed 42 \
--output owl_white_edit.png
```

Optional localized editing is supported with a mask:

```sh
mflux-generate-fibo-edit \
--image-path owl_original.png \
--mask-path owl_mask.png \
--prompt-file owl_brown.json \
--edit-instruction "Replace only the owl with a white owl wearing glasses." \
--steps 20 \
--seed 42 \
--output owl_masked_edit.png
```

### Inspire
Provide an image instead of text. FIBO's vision-language model extracts a detailed, structured prompt, blends it with your creative intent, and produces related images—ideal for inspiration without overreliance on the original.

Expand Down
79 changes: 79 additions & 0 deletions src/mflux/models/fibo/cli/fibo_edit_generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from pathlib import Path

from mflux.callbacks.callback_manager import CallbackManager
from mflux.cli.defaults import defaults as ui_defaults
from mflux.cli.parser.parsers import CommandLineParser
from mflux.models.fibo.latent_creator.fibo_latent_creator import FiboLatentCreator
from mflux.models.fibo.variants.edit.fibo_edit import FIBOEdit
from mflux.models.fibo.variants.edit.util import FiboEditUtil
from mflux.models.fibo.variants.txt2img.util import FiboUtil
from mflux.utils.dimension_resolver import DimensionResolver
from mflux.utils.exceptions import PromptFileReadError, StopImageGenerationException
from mflux.utils.prompt_util import PromptUtil


def main():
parser = CommandLineParser(description="Generate an edited image using Bria FIBO Edit.")
parser.add_general_arguments()
parser.add_model_arguments(require_model_arg=False)
parser.add_lora_arguments()
parser.add_image_generator_arguments(supports_metadata_config=True, supports_dimension_scale_factor=True)
parser.add_argument("--image-path", type=Path, required=True, help="Local path to source image for editing.")
parser.add_argument("--mask-path", type=Path, default=None, help="Optional mask image path for localized edits.")
parser.add_argument(
"--edit-instruction",
type=str,
default=None,
help="Optional edit instruction. Used when prompt JSON does not already include `edit_instruction`.",
)
parser.add_output_arguments()
args = parser.parse_args()

if args.guidance is None:
args.guidance = ui_defaults.GUIDANCE_SCALE

json_prompt = FiboUtil.get_json_prompt(args, quantize=args.quantize)
json_prompt = FiboEditUtil.ensure_edit_instruction(json_prompt, edit_instruction=args.edit_instruction)

fibo_edit = FIBOEdit(
quantize=args.quantize,
model_path=args.model_path,
lora_paths=args.lora_paths,
lora_scales=args.lora_scales,
)

memory_saver = CallbackManager.register_callbacks(
args=args,
model=fibo_edit,
latent_creator=FiboLatentCreator,
)

try:
width, height = DimensionResolver.resolve(
width=args.width,
height=args.height,
reference_image_path=args.image_path,
)
for seed in args.seed:
image = fibo_edit.generate_image(
seed=seed,
prompt=json_prompt,
image_path=args.image_path,
mask_path=args.mask_path,
width=width,
height=height,
guidance=args.guidance,
num_inference_steps=args.steps,
scheduler="flow_match_euler_discrete",
negative_prompt=PromptUtil.read_negative_prompt(args),
)
image.save(path=args.output.format(seed=seed), export_json_metadata=args.metadata)
except (StopImageGenerationException, PromptFileReadError, ValueError) as exc:
print(exc)
finally:
if memory_saver:
print(memory_saver.memory_stats())


if __name__ == "__main__":
main()
25 changes: 23 additions & 2 deletions src/mflux/models/fibo/model/fibo_transformer/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,29 @@ def __call__(
hidden_states: mx.array,
encoder_hidden_states: mx.array,
text_encoder_layers: list[mx.array],
conditioning_seq_len: int = 0,
conditioning_image_ids: mx.array | None = None,
) -> mx.array:
# 1. Create embeddings
hidden_states = FiboTransformer._handle_classifier_free_guidance(hidden_states, encoder_hidden_states)
hidden_states = self.x_embedder(hidden_states)
encoder_hidden_states = self.context_embedder(encoder_hidden_states)
time_embeddings = FiboTransformer._compute_time_embeddings(t, config, hidden_states.shape[0], hidden_states.dtype, self.time_embed) # fmt: off
image_rotary_emb = FiboTransformer._compute_rotary_embeddings(encoder_hidden_states, self.pos_embed, config, hidden_states.dtype) # fmt: off
image_rotary_emb = FiboTransformer._compute_rotary_embeddings(
encoder_hidden_states=encoder_hidden_states,
pos_embed=self.pos_embed,
config=config,
dtype=hidden_states.dtype,
conditioning_image_ids=conditioning_image_ids,
)

# 2. Compute attention mask
attention_mask = FiboTransformer._compute_attention_mask(
config=config,
batch_size=hidden_states.shape[0],
encoder_hidden_states=encoder_hidden_states,
max_tokens=encoder_hidden_states.shape[1],
conditioning_seq_len=conditioning_seq_len,
)

# 3. Project the fibo-specific text encoder layers
Expand Down Expand Up @@ -165,10 +174,15 @@ def _compute_rotary_embeddings(
pos_embed: FiboEmbedND,
config: Config,
dtype: mx.Dtype,
conditioning_image_ids: mx.array | None = None,
) -> mx.array:
max_tokens = encoder_hidden_states.shape[1]
txt_ids = mx.zeros((max_tokens, 3), dtype=dtype)
img_ids = FiboTransformer._prepare_latent_image_ids(height=config.height, width=config.width, dtype=dtype)
if conditioning_image_ids is not None:
if conditioning_image_ids.ndim == 3:
conditioning_image_ids = conditioning_image_ids[0]
img_ids = mx.concatenate([img_ids, conditioning_image_ids.astype(dtype)], axis=0)

if txt_ids.ndim == 3 and txt_ids.shape[0] == 1:
txt_ids = txt_ids[0]
Expand Down Expand Up @@ -212,14 +226,21 @@ def _compute_attention_mask(
config: Config,
encoder_hidden_states: mx.array,
max_tokens: int,
conditioning_seq_len: int = 0,
) -> mx.array:
vae_scale_factor = 16
latent_height = config.height // vae_scale_factor
latent_width = config.width // vae_scale_factor
latent_seq_len = latent_height * latent_width
prompt_attention_mask = mx.ones((batch_size, max_tokens), dtype=mx.float32)
latent_attention_mask = mx.ones((batch_size, latent_seq_len), dtype=mx.float32)
attention_mask_2d = mx.concatenate([prompt_attention_mask, latent_attention_mask], axis=1)
if conditioning_seq_len > 0:
conditioning_attention_mask = mx.ones((batch_size, conditioning_seq_len), dtype=mx.float32)
attention_mask_2d = mx.concatenate(
[prompt_attention_mask, latent_attention_mask, conditioning_attention_mask], axis=1
)
else:
attention_mask_2d = mx.concatenate([prompt_attention_mask, latent_attention_mask], axis=1)
attention_mask = FiboTransformer._prepare_attention_mask(attention_mask_2d)
attention_mask = attention_mask.astype(encoder_hidden_states.dtype)
return attention_mask
3 changes: 3 additions & 0 deletions src/mflux/models/fibo/variants/edit/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .fibo_edit import FIBOEdit

__all__ = ["FIBOEdit"]
Loading