Skip to content

Commit e6e02ec

Browse files
authored
[diffusion]: Add model detectors and warning for quantized diffusion models (#18041)
1 parent 145ae51 commit e6e02ec

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

python/sglang/multimodal_gen/registry.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,17 @@ def get_model_info(
390390
# 1. Discover all available pipeline classes and cache them
391391
_discover_and_register_pipelines()
392392

393+
# Detect quantized models and fallback to diffusers
394+
is_quantized = any(q in model_path.lower() for q in ["-4bit", "-awq", "-gptq"])
395+
if is_quantized and backend != Backend.DIFFUSERS:
396+
logger.info(
397+
"Detected a quantized model format ('%s'). "
398+
"The native sglang-diffusion engine currently only supports BF16/FP16. "
399+
"Falling back to diffusers backend.",
400+
model_path,
401+
)
402+
return _get_diffusers_model_info(model_path)
403+
393404
# 2. Get pipeline class - check non-diffusers models first
394405
pipeline_class_name = get_non_diffusers_pipeline_name(model_path)
395406
if pipeline_class_name:
@@ -656,34 +667,49 @@ def _register_configs():
656667
sampling_param_cls=QwenImageSamplingParams,
657668
pipeline_config_cls=QwenImagePipelineConfig,
658669
hf_model_paths=["Qwen/Qwen-Image"],
670+
model_detectors=[
671+
lambda hf_id: "qwen-image" in hf_id.lower()
672+
and "edit" not in hf_id.lower()
673+
and "layered" not in hf_id.lower()
674+
and "2512" not in hf_id.lower()
675+
],
659676
)
660677
register_configs(
661678
sampling_param_cls=QwenImage2512SamplingParams,
662679
pipeline_config_cls=QwenImagePipelineConfig,
663680
hf_model_paths=["Qwen/Qwen-Image-2512"],
681+
model_detectors=[lambda hf_id: "qwen-image-2512" in hf_id.lower()],
664682
)
665683
register_configs(
666684
sampling_param_cls=QwenImageSamplingParams,
667685
pipeline_config_cls=QwenImageEditPipelineConfig,
668686
hf_model_paths=["Qwen/Qwen-Image-Edit"],
687+
model_detectors=[
688+
lambda hf_id: "qwen-image-edit" in hf_id.lower()
689+
and "2509" not in hf_id.lower()
690+
and "2511" not in hf_id.lower()
691+
],
669692
)
670693

671694
register_configs(
672695
sampling_param_cls=QwenImageEditPlusSamplingParams,
673696
pipeline_config_cls=QwenImageEditPlusPipelineConfig,
674697
hf_model_paths=["Qwen/Qwen-Image-Edit-2509"],
698+
model_detectors=[lambda hf_id: "qwen-image-edit-2509" in hf_id.lower()],
675699
)
676700

677701
register_configs(
678702
sampling_param_cls=QwenImageEditPlusSamplingParams,
679703
pipeline_config_cls=QwenImageEditPlus_2511_PipelineConfig,
680704
hf_model_paths=["Qwen/Qwen-Image-Edit-2511"],
705+
model_detectors=[lambda hf_id: "qwen-image-edit-2511" in hf_id.lower()],
681706
)
682707

683708
register_configs(
684709
sampling_param_cls=QwenImageLayeredSamplingParams,
685710
pipeline_config_cls=QwenImageLayeredPipelineConfig,
686711
hf_model_paths=["Qwen/Qwen-Image-Layered"],
712+
model_detectors=[lambda hf_id: "qwen-image-layered" in hf_id.lower()],
687713
)
688714

689715
register_configs(

0 commit comments

Comments
 (0)