Skip to content

Commit 49115cb

Browse files
committed
feat: enable memory-mapped tensors for all compatible backends
1 parent 6e4f647 commit 49115cb

1 file changed

Lines changed: 58 additions & 14 deletions

File tree

src/stable-diffusion.cpp

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,50 @@ class StableDiffusionGGML {
408408
apply_lora_immediately = false;
409409
}
410410

411+
std::map<std::string, ggml_tensor*> mmap_able_tensors;
412+
bool enable_mmap_tensors = false;
413+
bool main_backend_mmap = false;
414+
if (sd_ctx_params->enable_mmap) {
415+
if (apply_lora_immediately) {
416+
LOG_DEBUG("cannot memory-map model weights: only supported with --lora-apply-mode at_runtime");
417+
} else {
418+
enable_mmap_tensors = true;
419+
if (offload_params_to_cpu) {
420+
main_backend_mmap = true;
421+
} else {
422+
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
423+
struct ggml_backend_dev_props props;
424+
ggml_backend_dev_get_props(dev, &props);
425+
main_backend_mmap = props.caps.buffer_from_host_ptr;
426+
}
427+
}
428+
}
429+
430+
// split definition to avoid msvc choking on the extra parameter handling
431+
auto get_param_tensors_p = [&](auto&& model, bool force_cpu, const char* prefix) {
432+
std::map<std::string, ggml_tensor*> temp;
433+
model->get_param_tensors(temp, prefix);
434+
bool do_mmap = enable_mmap_tensors && (main_backend_mmap || force_cpu);
435+
for (const auto& [key, tensor] : temp) {
436+
tensors[key] = tensor;
437+
if (do_mmap) {
438+
mmap_able_tensors[key] = tensor;
439+
}
440+
}
441+
};
442+
443+
auto get_param_tensors = [&](auto&& model, bool force_cpu = false) {
444+
std::map<std::string, ggml_tensor*> temp;
445+
model->get_param_tensors(temp);
446+
bool do_mmap = enable_mmap_tensors && (main_backend_mmap || force_cpu);
447+
for (const auto& [key, tensor] : temp) {
448+
tensors[key] = tensor;
449+
if (do_mmap) {
450+
mmap_able_tensors[key] = tensor;
451+
}
452+
}
453+
};
454+
411455
if (sd_version_is_control(version)) {
412456
// Might need vae encode for control cond
413457
vae_decode_only = false;
@@ -514,7 +558,7 @@ class StableDiffusionGGML {
514558
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend,
515559
offload_params_to_cpu,
516560
tensor_storage_map);
517-
clip_vision->get_param_tensors(tensors);
561+
get_param_tensors(clip_vision);
518562
}
519563
} else if (sd_version_is_qwen_image(version)) {
520564
bool enable_vision = false;
@@ -580,16 +624,16 @@ class StableDiffusionGGML {
580624
}
581625
}
582626

583-
cond_stage_model->get_param_tensors(tensors);
627+
get_param_tensors(cond_stage_model, clip_on_cpu);
584628

585-
diffusion_model->get_param_tensors(tensors);
629+
get_param_tensors(diffusion_model);
586630

587631
if (sd_version_is_unet_edit(version)) {
588632
vae_decode_only = false;
589633
}
590634

591635
if (high_noise_diffusion_model) {
592-
high_noise_diffusion_model->get_param_tensors(tensors);
636+
get_param_tensors(high_noise_diffusion_model);
593637
}
594638

595639
if (sd_ctx_params->keep_vae_on_cpu && !ggml_backend_is_cpu(backend)) {
@@ -652,6 +696,8 @@ class StableDiffusionGGML {
652696
}
653697
};
654698

699+
bool force_vae_cpu = sd_ctx_params->keep_vae_on_cpu;
700+
655701
if (version == VERSION_CHROMA_RADIANCE) {
656702
LOG_INFO("using FakeVAE");
657703
first_stage_model = std::make_shared<FakeVAE>(version,
@@ -660,15 +706,15 @@ class StableDiffusionGGML {
660706
} else if (use_tae && !tae_preview_only) {
661707
LOG_INFO("using TAE for encoding / decoding");
662708
first_stage_model = create_tae();
663-
first_stage_model->get_param_tensors(tensors, "tae");
709+
get_param_tensors_p(first_stage_model, force_vae_cpu, "tae");
664710
} else {
665711
LOG_INFO("using VAE for encoding / decoding");
666712
first_stage_model = create_vae();
667-
first_stage_model->get_param_tensors(tensors, "first_stage_model");
713+
get_param_tensors_p(first_stage_model, force_vae_cpu, "first_stage_model");
668714
if (use_tae && tae_preview_only) {
669715
LOG_INFO("using TAE for preview");
670716
preview_vae = create_tae();
671-
preview_vae->get_param_tensors(tensors, "tae");
717+
get_param_tensors_p(first_stage_model, force_vae_cpu, "tae");
672718
}
673719
}
674720

@@ -733,7 +779,7 @@ class StableDiffusionGGML {
733779
}
734780
}
735781
if (use_pmid) {
736-
pmid_model->get_param_tensors(tensors, "pmid");
782+
get_param_tensors_p(pmid_model, false, "pmid");
737783
}
738784

739785
if (sd_ctx_params->flash_attn) {
@@ -810,13 +856,11 @@ class StableDiffusionGGML {
810856
ignore_tensors.insert("conditioner.embedders.3");
811857
}
812858

813-
if (sd_ctx_params->enable_mmap) {
814-
if (!(offload_params_to_cpu || ggml_backend_is_cpu(backend))) {
815-
LOG_DEBUG("cannot memory-map model weights: only supported with CPU or --offload-to-cpu");
816-
} else if (apply_lora_immediately) {
817-
LOG_DEBUG("cannot memory-map model weights: only supported with --lora-apply-mode at_runtime");
859+
if (enable_mmap_tensors) {
860+
if (mmap_able_tensors.empty()) {
861+
LOG_DEBUG("no tensors could be memory-mapped");
818862
} else {
819-
mmap_tensor_store = model_loader.mmap_tensors(tensors, ignore_tensors);
863+
mmap_tensor_store = model_loader.mmap_tensors(mmap_able_tensors, ignore_tensors);
820864
}
821865
}
822866

0 commit comments

Comments
 (0)