@@ -408,6 +408,50 @@ class StableDiffusionGGML {
408408 apply_lora_immediately = false ;
409409 }
410410
411+ std::map<std::string, ggml_tensor*> mmap_able_tensors;
412+ bool enable_mmap_tensors = false ;
413+ bool main_backend_mmap = false ;
414+ if (sd_ctx_params->enable_mmap ) {
415+ if (apply_lora_immediately) {
416+ LOG_DEBUG (" cannot memory-map model weights: only supported with --lora-apply-mode at_runtime" );
417+ } else {
418+ enable_mmap_tensors = true ;
419+ if (offload_params_to_cpu) {
420+ main_backend_mmap = true ;
421+ } else {
422+ ggml_backend_dev_t dev = ggml_backend_get_device (backend);
423+ struct ggml_backend_dev_props props;
424+ ggml_backend_dev_get_props (dev, &props);
425+ main_backend_mmap = props.caps .buffer_from_host_ptr ;
426+ }
427+ }
428+ }
429+
430+ // split definition to avoid msvc choking on the extra parameter handling
431+ auto get_param_tensors_p = [&](auto && model, bool force_cpu, const char * prefix) {
432+ std::map<std::string, ggml_tensor*> temp;
433+ model->get_param_tensors (temp, prefix);
434+ bool do_mmap = enable_mmap_tensors && (main_backend_mmap || force_cpu);
435+ for (const auto & [key, tensor] : temp) {
436+ tensors[key] = tensor;
437+ if (do_mmap) {
438+ mmap_able_tensors[key] = tensor;
439+ }
440+ }
441+ };
442+
443+ auto get_param_tensors = [&](auto && model, bool force_cpu = false ) {
444+ std::map<std::string, ggml_tensor*> temp;
445+ model->get_param_tensors (temp);
446+ bool do_mmap = enable_mmap_tensors && (main_backend_mmap || force_cpu);
447+ for (const auto & [key, tensor] : temp) {
448+ tensors[key] = tensor;
449+ if (do_mmap) {
450+ mmap_able_tensors[key] = tensor;
451+ }
452+ }
453+ };
454+
411455 if (sd_version_is_control (version)) {
412456 // Might need vae encode for control cond
413457 vae_decode_only = false ;
@@ -514,7 +558,7 @@ class StableDiffusionGGML {
514558 clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend,
515559 offload_params_to_cpu,
516560 tensor_storage_map);
517- clip_vision-> get_param_tensors (tensors );
561+ get_param_tensors (clip_vision );
518562 }
519563 } else if (sd_version_is_qwen_image (version)) {
520564 bool enable_vision = false ;
@@ -580,16 +624,16 @@ class StableDiffusionGGML {
580624 }
581625 }
582626
583- cond_stage_model-> get_param_tensors (tensors );
627+ get_param_tensors (cond_stage_model, clip_on_cpu );
584628
585- diffusion_model-> get_param_tensors (tensors );
629+ get_param_tensors (diffusion_model );
586630
587631 if (sd_version_is_unet_edit (version)) {
588632 vae_decode_only = false ;
589633 }
590634
591635 if (high_noise_diffusion_model) {
592- high_noise_diffusion_model-> get_param_tensors (tensors );
636+ get_param_tensors (high_noise_diffusion_model );
593637 }
594638
595639 if (sd_ctx_params->keep_vae_on_cpu && !ggml_backend_is_cpu (backend)) {
@@ -652,6 +696,8 @@ class StableDiffusionGGML {
652696 }
653697 };
654698
699+ bool force_vae_cpu = sd_ctx_params->keep_vae_on_cpu ;
700+
655701 if (version == VERSION_CHROMA_RADIANCE) {
656702 LOG_INFO (" using FakeVAE" );
657703 first_stage_model = std::make_shared<FakeVAE>(version,
@@ -660,15 +706,15 @@ class StableDiffusionGGML {
660706 } else if (use_tae && !tae_preview_only) {
661707 LOG_INFO (" using TAE for encoding / decoding" );
662708 first_stage_model = create_tae ();
663- first_stage_model-> get_param_tensors (tensors , " tae" );
709+ get_param_tensors_p (first_stage_model, force_vae_cpu , " tae" );
664710 } else {
665711 LOG_INFO (" using VAE for encoding / decoding" );
666712 first_stage_model = create_vae ();
667- first_stage_model-> get_param_tensors (tensors , " first_stage_model" );
713+ get_param_tensors_p (first_stage_model, force_vae_cpu , " first_stage_model" );
668714 if (use_tae && tae_preview_only) {
669715 LOG_INFO (" using TAE for preview" );
670716 preview_vae = create_tae ();
671- preview_vae-> get_param_tensors (tensors , " tae" );
717+ get_param_tensors_p (first_stage_model, force_vae_cpu , " tae" );
672718 }
673719 }
674720
@@ -733,7 +779,7 @@ class StableDiffusionGGML {
733779 }
734780 }
735781 if (use_pmid) {
736- pmid_model-> get_param_tensors (tensors , " pmid" );
782+ get_param_tensors_p (pmid_model, false , " pmid" );
737783 }
738784
739785 if (sd_ctx_params->flash_attn ) {
@@ -810,13 +856,11 @@ class StableDiffusionGGML {
810856 ignore_tensors.insert (" conditioner.embedders.3" );
811857 }
812858
813- if (sd_ctx_params->enable_mmap ) {
814- if (!(offload_params_to_cpu || ggml_backend_is_cpu (backend))) {
815- LOG_DEBUG (" cannot memory-map model weights: only supported with CPU or --offload-to-cpu" );
816- } else if (apply_lora_immediately) {
817- LOG_DEBUG (" cannot memory-map model weights: only supported with --lora-apply-mode at_runtime" );
859+ if (enable_mmap_tensors) {
860+ if (mmap_able_tensors.empty ()) {
861+ LOG_DEBUG (" no tensors could be memory-mapped" );
818862 } else {
819- mmap_tensor_store = model_loader.mmap_tensors (tensors , ignore_tensors);
863+ mmap_tensor_store = model_loader.mmap_tensors (mmap_able_tensors , ignore_tensors);
820864 }
821865 }
822866
0 commit comments