@@ -705,16 +705,11 @@ void ModelLoader::set_wtype_override(ggml_type wtype, std::string tensor_type_ru
705705 }
706706}
707707
708- bool ModelLoader::load_tensors (on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
709- int64_t process_time_ms = 0 ;
710- std::atomic<int64_t > read_time_ms (0 );
711- std::atomic<int64_t > memcpy_time_ms (0 );
712- std::atomic<int64_t > copy_to_backend_time_ms (0 );
713- std::atomic<int64_t > convert_time_ms (0 );
714- std::atomic<uint64_t > bytes_processed (0 );
708+ void ModelLoader::process_model_files (bool enable_mmap) {
715709
716- int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores ();
717- LOG_DEBUG (" using %d threads for model loading" , num_threads_to_use);
710+ if (model_files_processed) {
711+ return ;
712+ }
718713
719714 int64_t start_time = ggml_time_ms ();
720715
@@ -726,22 +721,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
726721 processed_tensor_storages.push_back (tensor_storage);
727722 }
728723
729- process_time_ms = ggml_time_ms () - start_time;
730-
731- bool success = true ;
732- size_t total_tensors_processed = 0 ;
733- const size_t total_tensors_to_process = processed_tensor_storages.size ();
734- const int64_t t_start = ggml_time_ms ();
735- int last_n_threads = 1 ;
736-
737724 for (size_t file_index = 0 ; file_index < file_paths_.size (); file_index++) {
738725 std::string file_path = file_paths_[file_index];
739- LOG_DEBUG (" loading tensors from %s" , file_path.c_str ());
740726
741- std::vector<const TensorStorage* > file_tensors;
727+ std::vector<TensorStorage> file_tensors;
742728 for (const auto & ts : processed_tensor_storages) {
743729 if (ts.file_index == file_index) {
744- file_tensors.push_back (& ts);
730+ file_tensors.push_back (ts);
745731 }
746732 }
747733 if (file_tensors.empty ()) {
@@ -750,7 +736,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
750736
751737 bool is_zip = false ;
752738 for (auto const & ts : file_tensors) {
753- if (ts-> index_in_zip >= 0 ) {
739+ if (ts. index_in_zip >= 0 ) {
754740 is_zip = true ;
755741 break ;
756742 }
@@ -765,6 +751,58 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
765751 }
766752 }
767753
754+ ModelFileData fdata;
755+ fdata.path = file_path;
756+ fdata.mmapped = std::shared_ptr<MmapWrapper>(std::move (mmapped));
757+ fdata.tensors = std::move (file_tensors);
758+ fdata.is_zip = is_zip;
759+
760+ file_data.push_back (std::move (fdata));
761+ }
762+
763+ model_files_processed = true ;
764+
765+ int64_t end_time = ggml_time_ms ();
766+ int64_t process_time_ms = end_time - start_time;
767+
768+ LOG_INFO (" model files processing completed in %.2fs" , process_time_ms / 1000 .f );
769+ }
770+
771+ bool ModelLoader::load_tensors (on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
772+
773+ process_model_files (enable_mmap);
774+
775+ std::atomic<int64_t > read_time_ms (0 );
776+ std::atomic<int64_t > memcpy_time_ms (0 );
777+ std::atomic<int64_t > copy_to_backend_time_ms (0 );
778+ std::atomic<int64_t > convert_time_ms (0 );
779+ std::atomic<uint64_t > bytes_processed (0 );
780+
781+ int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores ();
782+ LOG_DEBUG (" using %d threads for model loading" , num_threads_to_use);
783+
784+ int64_t start_time = ggml_time_ms ();
785+
786+ size_t total_tensors_to_process = 0 ;
787+ for (const auto & fdata : file_data) {
788+ total_tensors_to_process += fdata.tensors .size ();
789+ }
790+
791+ bool success = true ;
792+ size_t total_tensors_processed = 0 ;
793+ const int64_t t_start = start_time;
794+ int last_n_threads = 1 ;
795+
796+ for (auto & fdata : file_data) {
797+ const std::string & file_path = fdata.path ;
798+ LOG_DEBUG (" loading tensors from %s" , file_path.c_str ());
799+
800+ const std::vector<TensorStorage> & file_tensors = fdata.tensors ;
801+
802+ bool is_zip = fdata.is_zip ;
803+
804+ std::shared_ptr<MmapWrapper> mmapped = fdata.mmapped ;
805+
768806 int n_threads = is_zip ? 1 : std::min (num_threads_to_use, (int )file_tensors.size ());
769807 if (n_threads < 1 ) {
770808 n_threads = 1 ;
@@ -805,7 +843,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
805843 break ;
806844 }
807845
808- const TensorStorage& tensor_storage = * file_tensors[idx];
846+ const TensorStorage& tensor_storage = file_tensors[idx];
809847 ggml_tensor* dst_tensor = nullptr ;
810848
811849 t0 = ggml_time_ms ();
@@ -965,9 +1003,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
9651003 }
9661004
9671005 int64_t end_time = ggml_time_ms ();
968- LOG_INFO (" loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)" ,
1006+ LOG_INFO (" loading tensors completed, taking %.2fs (read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)" ,
9691007 (end_time - start_time) / 1000 .f ,
970- process_time_ms / 1000 .f ,
9711008 (read_time_ms.load () / (float )last_n_threads) / 1000 .f ,
9721009 (memcpy_time_ms.load () / (float )last_n_threads) / 1000 .f ,
9731010 (convert_time_ms.load () / (float )last_n_threads) / 1000 .f ,
0 commit comments