Skip to content

Commit 97190f6

Browse files
committed
feat: enable memory-mapped tensors with --lora-apply-mode immediately
In this case, insstead of disabling mmap, we turn the mapping writable.
1 parent db5305b commit 97190f6

5 files changed

Lines changed: 34 additions & 25 deletions

File tree

src/model.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ void ModelLoader::set_wtype_override(ggml_type wtype, std::string tensor_type_ru
706706
}
707707
}
708708

709-
void ModelLoader::process_model_files(bool enable_mmap) {
709+
void ModelLoader::process_model_files(bool enable_mmap, bool writable_mmap) {
710710

711711
if (model_files_processed) {
712712
return;
@@ -746,7 +746,7 @@ void ModelLoader::process_model_files(bool enable_mmap) {
746746
std::unique_ptr<MmapWrapper> mmapped;
747747
if (enable_mmap && !is_zip) {
748748
LOG_DEBUG("using mmap for I/O");
749-
mmapped = MmapWrapper::create(file_path);
749+
mmapped = MmapWrapper::create(file_path, writable_mmap);
750750
if (!mmapped) {
751751
LOG_WARN("failed to memory-map '%s'", file_path.c_str());
752752
}
@@ -770,9 +770,9 @@ void ModelLoader::process_model_files(bool enable_mmap) {
770770
}
771771

772772
std::vector<MmapTensorStore> ModelLoader::mmap_tensors(std::map<std::string, ggml_tensor*>& tensors,
773-
std::set<std::string> ignore_tensors)
773+
std::set<std::string> ignore_tensors, bool writable_mmap)
774774
{
775-
process_model_files(true);
775+
process_model_files(true, writable_mmap);
776776

777777
std::vector<MmapTensorStore> result;
778778
uint64_t mapped_bytes = 0;
@@ -788,7 +788,7 @@ std::vector<MmapTensorStore> ModelLoader::mmap_tensors(std::map<std::string, ggm
788788
const std::vector<TensorStorage>& file_tensors = fdata.tensors;
789789
std::shared_ptr<MmapWrapper> mmapped = fdata.mmapped;
790790

791-
uint8_t * mmap_data = const_cast<uint8_t*>(mmapped->data());
791+
uint8_t * mmap_data = mmapped->writable_data();
792792

793793
ggml_backend_buffer_t buf_mmap = ggml_backend_cpu_buffer_from_ptr(mmap_data, mmapped->size());
794794
if (!buf_mmap) {
@@ -864,7 +864,7 @@ std::vector<MmapTensorStore> ModelLoader::mmap_tensors(std::map<std::string, ggm
864864

865865
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
866866

867-
process_model_files(enable_mmap);
867+
process_model_files(enable_mmap, false);
868868

869869
std::atomic<int64_t> read_time_ms(0);
870870
std::atomic<int64_t> memcpy_time_ms(0);

src/model.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,10 @@ class ModelLoader {
232232
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
233233
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
234234
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
235-
void process_model_files(bool enable_mmap = false);
235+
void process_model_files(bool enable_mmap = false, bool writable_mmap = true);
236236
std::vector<MmapTensorStore> mmap_tensors(std::map<std::string, ggml_tensor*>& tensors,
237-
std::set<std::string> ignore_tensors = {});
237+
std::set<std::string> ignore_tensors = {},
238+
bool writable = true);
238239
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0, bool use_mmap = false);
239240
bool load_tensors(std::map<std::string, ggml_tensor*>& tensors,
240241
std::set<std::string> ignore_tensors = {},

src/stable-diffusion.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -414,19 +414,20 @@ class StableDiffusionGGML {
414414
std::map<std::string, ggml_tensor*> mmap_able_tensors;
415415
bool enable_mmap_tensors = false;
416416
bool main_backend_mmap = false;
417+
bool needs_writable_mmap = false;
417418
if (sd_ctx_params->enable_mmap) {
418419
if (apply_lora_immediately) {
419-
LOG_DEBUG("cannot memory-map model weights: only supported with --lora-apply-mode at_runtime");
420+
needs_writable_mmap = true;
421+
LOG_WARN("in mode 'immediately', LoRAs will cause extra memory usage with mmap");
422+
}
423+
enable_mmap_tensors = true;
424+
if (offload_params_to_cpu) {
425+
main_backend_mmap = true;
420426
} else {
421-
enable_mmap_tensors = true;
422-
if (offload_params_to_cpu) {
423-
main_backend_mmap = true;
424-
} else {
425-
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
426-
struct ggml_backend_dev_props props;
427-
ggml_backend_dev_get_props(dev, &props);
428-
main_backend_mmap = props.caps.buffer_from_host_ptr;
429-
}
427+
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
428+
struct ggml_backend_dev_props props;
429+
ggml_backend_dev_get_props(dev, &props);
430+
main_backend_mmap = props.caps.buffer_from_host_ptr;
430431
}
431432
}
432433

@@ -876,7 +877,7 @@ class StableDiffusionGGML {
876877
if (mmap_able_tensors.empty()) {
877878
LOG_DEBUG("no tensors could be memory-mapped");
878879
} else {
879-
mmap_tensor_store = model_loader.mmap_tensors(mmap_able_tensors, ignore_tensors);
880+
mmap_tensor_store = model_loader.mmap_tensors(mmap_able_tensors, ignore_tensors, needs_writable_mmap);
880881
}
881882
}
882883

src/util.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class MmapWrapperImpl : public MmapWrapper {
111111
HANDLE hmapping_;
112112
};
113113

114-
std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
114+
std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename, bool writable) {
115115
void* mapped_data = nullptr;
116116
size_t file_size = 0;
117117

@@ -136,14 +136,18 @@ std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
136136

137137
file_size = static_cast<size_t>(size.QuadPart);
138138

139-
HANDLE mapping_handle = CreateFileMapping(file_handle, NULL, PAGE_READONLY, 0, 0, NULL);
139+
DWORD page_prot = writable ? PAGE_WRITECOPY : PAGE_READONLY;
140+
141+
HANDLE mapping_handle = CreateFileMapping(file_handle, NULL, page_prot, 0, 0, NULL);
140142

141143
if (mapping_handle == NULL) {
142144
CloseHandle(file_handle);
143145
return nullptr;
144146
}
145147

146-
mapped_data = MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, file_size);
148+
DWORD view_access = writable ? FILE_MAP_COPY : FILE_MAP_READ;
149+
150+
mapped_data = MapViewOfFile(mapping_handle, view_access, 0, 0, file_size);
147151

148152
if (mapped_data == NULL) {
149153
CloseHandle(mapping_handle);
@@ -181,7 +185,7 @@ class MmapWrapperImpl : public MmapWrapper {
181185
}
182186
};
183187

184-
std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
188+
std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename, bool writable) {
185189
int file_descriptor = open(filename.c_str(), O_RDONLY);
186190
if (file_descriptor == -1) {
187191
return nullptr;
@@ -203,7 +207,9 @@ std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
203207

204208
size_t file_size = sb.st_size;
205209

206-
void* mapped_data = mmap(NULL, file_size, PROT_READ, mmap_flags, file_descriptor, 0);
210+
int mmap_prot = PROT_READ | (writable ? PROT_WRITE : 0);
211+
212+
void* mapped_data = mmap(NULL, file_size, mmap_prot, mmap_flags, file_descriptor, 0);
207213

208214
close(file_descriptor);
209215

src/util.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ sd::Tensor<float> clip_preprocess(const sd::Tensor<float>& image, int target_wid
4141

4242
class MmapWrapper {
4343
public:
44-
static std::unique_ptr<MmapWrapper> create(const std::string& filename);
44+
static std::unique_ptr<MmapWrapper> create(const std::string& filename, bool writable = false);
4545

4646
virtual ~MmapWrapper() = default;
4747

@@ -51,6 +51,7 @@ class MmapWrapper {
5151
MmapWrapper& operator=(MmapWrapper&&) = delete;
5252

5353
const uint8_t* data() const { return static_cast<uint8_t*>(data_); }
54+
uint8_t* writable_data() { return static_cast<uint8_t*>(data_); }
5455
size_t size() const { return size_; }
5556
bool copy_data(void* buf, size_t n, size_t offset) const;
5657

0 commit comments

Comments
 (0)