Skip to content

Commit d3b89f0

Browse files
committed
refactor: split model file processing from tensor loading
1 parent d1706c1 commit d3b89f0

2 files changed

Lines changed: 73 additions & 24 deletions

File tree

src/model.cpp

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -705,16 +705,11 @@ void ModelLoader::set_wtype_override(ggml_type wtype, std::string tensor_type_ru
705705
}
706706
}
707707

708-
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
709-
int64_t process_time_ms = 0;
710-
std::atomic<int64_t> read_time_ms(0);
711-
std::atomic<int64_t> memcpy_time_ms(0);
712-
std::atomic<int64_t> copy_to_backend_time_ms(0);
713-
std::atomic<int64_t> convert_time_ms(0);
714-
std::atomic<uint64_t> bytes_processed(0);
708+
void ModelLoader::process_model_files(bool enable_mmap) {
715709

716-
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores();
717-
LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
710+
if (model_files_processed) {
711+
return;
712+
}
718713

719714
int64_t start_time = ggml_time_ms();
720715

@@ -726,22 +721,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
726721
processed_tensor_storages.push_back(tensor_storage);
727722
}
728723

729-
process_time_ms = ggml_time_ms() - start_time;
730-
731-
bool success = true;
732-
size_t total_tensors_processed = 0;
733-
const size_t total_tensors_to_process = processed_tensor_storages.size();
734-
const int64_t t_start = ggml_time_ms();
735-
int last_n_threads = 1;
736-
737724
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
738725
std::string file_path = file_paths_[file_index];
739-
LOG_DEBUG("loading tensors from %s", file_path.c_str());
740726

741-
std::vector<const TensorStorage*> file_tensors;
727+
std::vector<TensorStorage> file_tensors;
742728
for (const auto& ts : processed_tensor_storages) {
743729
if (ts.file_index == file_index) {
744-
file_tensors.push_back(&ts);
730+
file_tensors.push_back(ts);
745731
}
746732
}
747733
if (file_tensors.empty()) {
@@ -750,7 +736,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
750736

751737
bool is_zip = false;
752738
for (auto const& ts : file_tensors) {
753-
if (ts->index_in_zip >= 0) {
739+
if (ts.index_in_zip >= 0) {
754740
is_zip = true;
755741
break;
756742
}
@@ -765,6 +751,58 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
765751
}
766752
}
767753

754+
ModelFileData fdata;
755+
fdata.path = file_path;
756+
fdata.mmapped = std::shared_ptr<MmapWrapper>(std::move(mmapped));
757+
fdata.tensors = std::move(file_tensors);
758+
fdata.is_zip = is_zip;
759+
760+
file_data.push_back(std::move(fdata));
761+
}
762+
763+
model_files_processed = true;
764+
765+
int64_t end_time = ggml_time_ms();
766+
int64_t process_time_ms = end_time - start_time;
767+
768+
LOG_INFO("model files processing completed in %.2fs", process_time_ms / 1000.f);
769+
}
770+
771+
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
772+
773+
process_model_files(enable_mmap);
774+
775+
std::atomic<int64_t> read_time_ms(0);
776+
std::atomic<int64_t> memcpy_time_ms(0);
777+
std::atomic<int64_t> copy_to_backend_time_ms(0);
778+
std::atomic<int64_t> convert_time_ms(0);
779+
std::atomic<uint64_t> bytes_processed(0);
780+
781+
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores();
782+
LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
783+
784+
int64_t start_time = ggml_time_ms();
785+
786+
size_t total_tensors_to_process = 0;
787+
for (const auto& fdata : file_data) {
788+
total_tensors_to_process += fdata.tensors.size();
789+
}
790+
791+
bool success = true;
792+
size_t total_tensors_processed = 0;
793+
const int64_t t_start = start_time;
794+
int last_n_threads = 1;
795+
796+
for (auto & fdata : file_data) {
797+
const std::string & file_path = fdata.path;
798+
LOG_DEBUG("loading tensors from %s", file_path.c_str());
799+
800+
const std::vector<TensorStorage> & file_tensors = fdata.tensors;
801+
802+
bool is_zip = fdata.is_zip;
803+
804+
std::shared_ptr<MmapWrapper> mmapped = fdata.mmapped;
805+
768806
int n_threads = is_zip ? 1 : std::min(num_threads_to_use, (int)file_tensors.size());
769807
if (n_threads < 1) {
770808
n_threads = 1;
@@ -805,7 +843,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
805843
break;
806844
}
807845

808-
const TensorStorage& tensor_storage = *file_tensors[idx];
846+
const TensorStorage& tensor_storage = file_tensors[idx];
809847
ggml_tensor* dst_tensor = nullptr;
810848

811849
t0 = ggml_time_ms();
@@ -965,9 +1003,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
9651003
}
9661004

9671005
int64_t end_time = ggml_time_ms();
968-
LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
1006+
LOG_INFO("loading tensors completed, taking %.2fs (read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
9691007
(end_time - start_time) / 1000.f,
970-
process_time_ms / 1000.f,
9711008
(read_time_ms.load() / (float)last_n_threads) / 1000.f,
9721009
(memcpy_time_ms.load() / (float)last_n_threads) / 1000.f,
9731010
(convert_time_ms.load() / (float)last_n_threads) / 1000.f,

src/model.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,21 @@ enum PMVersion {
190190

191191
typedef OrderedMap<std::string, TensorStorage> String2TensorStorage;
192192

193+
class MmapWrapper;
194+
195+
struct ModelFileData {
196+
std::string path;
197+
std::vector<TensorStorage> tensors;
198+
std::shared_ptr<MmapWrapper> mmapped;
199+
bool is_zip;
200+
};
201+
193202
class ModelLoader {
194203
protected:
195204
SDVersion version_ = VERSION_COUNT;
196205
std::vector<std::string> file_paths_;
206+
std::vector<ModelFileData> file_data;
207+
bool model_files_processed = false;
197208
String2TensorStorage tensor_storage_map;
198209

199210
void add_tensor_storage(const TensorStorage& tensor_storage);
@@ -216,6 +227,7 @@ class ModelLoader {
216227
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
217228
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
218229
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
230+
void process_model_files(bool enable_mmap = false);
219231
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0, bool use_mmap = false);
220232
bool load_tensors(std::map<std::string, ggml_tensor*>& tensors,
221233
std::set<std::string> ignore_tensors = {},

0 commit comments

Comments
 (0)