@@ -411,6 +411,50 @@ class StableDiffusionGGML {
411411 apply_lora_immediately = false ;
412412 }
413413
414+ std::map<std::string, ggml_tensor*> mmap_able_tensors;
415+ bool enable_mmap_tensors = false ;
416+ bool main_backend_mmap = false ;
417+ if (sd_ctx_params->enable_mmap ) {
418+ if (apply_lora_immediately) {
419+ LOG_DEBUG (" cannot memory-map model weights: only supported with --lora-apply-mode at_runtime" );
420+ } else {
421+ enable_mmap_tensors = true ;
422+ if (offload_params_to_cpu) {
423+ main_backend_mmap = true ;
424+ } else {
425+ ggml_backend_dev_t dev = ggml_backend_get_device (backend);
426+ struct ggml_backend_dev_props props;
427+ ggml_backend_dev_get_props (dev, &props);
428+ main_backend_mmap = props.caps .buffer_from_host_ptr ;
429+ }
430+ }
431+ }
432+
433+ // split definition to avoid msvc choking on the extra parameter handling
434+ auto get_param_tensors_p = [&](auto && model, bool force_cpu, const char * prefix) {
435+ std::map<std::string, ggml_tensor*> temp;
436+ model->get_param_tensors (temp, prefix);
437+ bool do_mmap = enable_mmap_tensors && (main_backend_mmap || force_cpu);
438+ for (const auto & [key, tensor] : temp) {
439+ tensors[key] = tensor;
440+ if (do_mmap) {
441+ mmap_able_tensors[key] = tensor;
442+ }
443+ }
444+ };
445+
446+ auto get_param_tensors = [&](auto && model, bool force_cpu = false ) {
447+ std::map<std::string, ggml_tensor*> temp;
448+ model->get_param_tensors (temp);
449+ bool do_mmap = enable_mmap_tensors && (main_backend_mmap || force_cpu);
450+ for (const auto & [key, tensor] : temp) {
451+ tensors[key] = tensor;
452+ if (do_mmap) {
453+ mmap_able_tensors[key] = tensor;
454+ }
455+ }
456+ };
457+
414458 if (sd_version_is_control (version)) {
415459 // Might need vae encode for control cond
416460 vae_decode_only = false ;
@@ -517,7 +561,7 @@ class StableDiffusionGGML {
517561 clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend,
518562 offload_params_to_cpu,
519563 tensor_storage_map);
520- clip_vision-> get_param_tensors (tensors );
564+ get_param_tensors (clip_vision );
521565 }
522566 } else if (sd_version_is_qwen_image (version)) {
523567 bool enable_vision = false ;
@@ -592,16 +636,16 @@ class StableDiffusionGGML {
592636 }
593637 }
594638
595- cond_stage_model-> get_param_tensors (tensors );
639+ get_param_tensors (cond_stage_model, clip_on_cpu );
596640
597- diffusion_model-> get_param_tensors (tensors );
641+ get_param_tensors (diffusion_model );
598642
599643 if (sd_version_is_unet_edit (version)) {
600644 vae_decode_only = false ;
601645 }
602646
603647 if (high_noise_diffusion_model) {
604- high_noise_diffusion_model-> get_param_tensors (tensors );
648+ get_param_tensors (high_noise_diffusion_model );
605649 }
606650
607651 if (sd_ctx_params->keep_vae_on_cpu && !ggml_backend_is_cpu (backend)) {
@@ -664,6 +708,8 @@ class StableDiffusionGGML {
664708 }
665709 };
666710
711+ bool force_vae_cpu = sd_ctx_params->keep_vae_on_cpu ;
712+
667713 if (version == VERSION_CHROMA_RADIANCE) {
668714 LOG_INFO (" using FakeVAE" );
669715 first_stage_model = std::make_shared<FakeVAE>(version,
@@ -672,15 +718,15 @@ class StableDiffusionGGML {
672718 } else if (use_tae && !tae_preview_only) {
673719 LOG_INFO (" using TAE for encoding / decoding" );
674720 first_stage_model = create_tae ();
675- first_stage_model-> get_param_tensors (tensors , " tae" );
721+ get_param_tensors_p (first_stage_model, force_vae_cpu , " tae" );
676722 } else {
677723 LOG_INFO (" using VAE for encoding / decoding" );
678724 first_stage_model = create_vae ();
679- first_stage_model-> get_param_tensors (tensors , " first_stage_model" );
725+ get_param_tensors_p (first_stage_model, force_vae_cpu , " first_stage_model" );
680726 if (use_tae && tae_preview_only) {
681727 LOG_INFO (" using TAE for preview" );
682728 preview_vae = create_tae ();
683- preview_vae-> get_param_tensors (tensors , " tae" );
729+ get_param_tensors_p (first_stage_model, force_vae_cpu , " tae" );
684730 }
685731 }
686732
@@ -745,7 +791,7 @@ class StableDiffusionGGML {
745791 }
746792 }
747793 if (use_pmid) {
748- pmid_model-> get_param_tensors (tensors , " pmid" );
794+ get_param_tensors_p (pmid_model, false , " pmid" );
749795 }
750796
751797 if (sd_ctx_params->flash_attn ) {
@@ -826,13 +872,11 @@ class StableDiffusionGGML {
826872 ignore_tensors.insert (" text_encoders.llm.multi_modal_projector." );
827873 }
828874
829- if (sd_ctx_params->enable_mmap ) {
830- if (!(offload_params_to_cpu || ggml_backend_is_cpu (backend))) {
831- LOG_DEBUG (" cannot memory-map model weights: only supported with CPU or --offload-to-cpu" );
832- } else if (apply_lora_immediately) {
833- LOG_DEBUG (" cannot memory-map model weights: only supported with --lora-apply-mode at_runtime" );
875+ if (enable_mmap_tensors) {
876+ if (mmap_able_tensors.empty ()) {
877+ LOG_DEBUG (" no tensors could be memory-mapped" );
834878 } else {
835- mmap_tensor_store = model_loader.mmap_tensors (tensors , ignore_tensors);
879+ mmap_tensor_store = model_loader.mmap_tensors (mmap_able_tensors , ignore_tensors);
836880 }
837881 }
838882
0 commit comments