diff --git a/model.cpp b/model.cpp index 1be057158..4e42018c8 100644 --- a/model.cpp +++ b/model.cpp @@ -1966,6 +1966,16 @@ std::vector remove_duplicates(const std::vector& v } bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { + int64_t process_time_ms = 0; + int64_t read_time_ms = 0; + int64_t memcpy_time_ms = 0; + int64_t copy_to_backend_time_ms = 0; + int64_t convert_time_ms = 0; + + int64_t prev_time_ms = 0; + int64_t curr_time_ms = 0; + int64_t start_time = ggml_time_ms(); + prev_time_ms = start_time; std::vector processed_tensor_storages; for (auto& tensor_storage : tensor_storages) { // LOG_DEBUG("%s", name.c_str()); @@ -1978,6 +1988,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { } std::vector dedup = remove_duplicates(processed_tensor_storages); processed_tensor_storages = dedup; + curr_time_ms = ggml_time_ms(); + process_time_ms = curr_time_ms - prev_time_ms; + prev_time_ms = curr_time_ms; bool success = true; for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) { @@ -2019,15 +2032,27 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { size_t entry_size = zip_entry_size(zip); if (entry_size != n) { read_buffer.resize(entry_size); + prev_time_ms = ggml_time_ms(); zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size); + curr_time_ms = ggml_time_ms(); + read_time_ms += curr_time_ms - prev_time_ms; + prev_time_ms = curr_time_ms; memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n); + curr_time_ms = ggml_time_ms(); + memcpy_time_ms += curr_time_ms - prev_time_ms; } else { + prev_time_ms = ggml_time_ms(); zip_entry_noallocread(zip, (void*)buf, n); + curr_time_ms = ggml_time_ms(); + read_time_ms += curr_time_ms - prev_time_ms; } zip_entry_close(zip); } else { + prev_time_ms = ggml_time_ms(); file.seekg(tensor_storage.offset); file.read(buf, n); + curr_time_ms = ggml_time_ms(); + read_time_ms += curr_time_ms - prev_time_ms; if (!file) { LOG_ERROR("read tensor data failed: '%s'", file_path.c_str()); return false; @@ -2072,6 +2097,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read); } + prev_time_ms = ggml_time_ms(); if (tensor_storage.is_bf16) { // inplace op bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements()); @@ -2086,10 +2112,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { } else if (tensor_storage.is_i64) { i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements()); } + curr_time_ms = ggml_time_ms(); + convert_time_ms += curr_time_ms - prev_time_ms; } else { read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read())); read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); + prev_time_ms = ggml_time_ms(); if (tensor_storage.is_bf16) { // inplace op bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements()); @@ -2109,11 +2138,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data, dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]); + curr_time_ms = ggml_time_ms(); + convert_time_ms += curr_time_ms - prev_time_ms; } } else { read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read())); read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); + prev_time_ms = ggml_time_ms(); if (tensor_storage.is_bf16) { // inplace op bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements()); @@ -2133,14 +2165,24 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { if (tensor_storage.type == dst_tensor->type) { // copy to device memory + curr_time_ms = ggml_time_ms(); + convert_time_ms += curr_time_ms - prev_time_ms; + prev_time_ms = curr_time_ms; ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor)); + curr_time_ms = ggml_time_ms(); + copy_to_backend_time_ms += curr_time_ms - prev_time_ms; } else { // convert first, then copy to device memory convert_buffer.resize(ggml_nbytes(dst_tensor)); convert_tensor((void*)read_buffer.data(), tensor_storage.type, (void*)convert_buffer.data(), dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]); + curr_time_ms = ggml_time_ms(); + convert_time_ms += curr_time_ms - prev_time_ms; + prev_time_ms = curr_time_ms; ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor)); + curr_time_ms = ggml_time_ms(); + copy_to_backend_time_ms += curr_time_ms - prev_time_ms; } } ++tensor_count; @@ -2170,6 +2212,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { break; } } + int64_t end_time = ggml_time_ms(); + LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)", + (end_time - start_time) / 1000.f, + process_time_ms / 1000.f, + read_time_ms / 1000.f, + memcpy_time_ms / 1000.f, + convert_time_ms / 1000.f, + copy_to_backend_time_ms / 1000.f); return success; } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 64164a2fb..db234a1a8 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -557,8 +557,6 @@ class StableDiffusionGGML { // load weights LOG_DEBUG("loading weights"); - int64_t t0 = ggml_time_ms(); - std::set ignore_tensors; tensors["alphas_cumprod"] = alphas_cumprod_tensor; if (use_tiny_autoencoder) { @@ -656,11 +654,7 @@ class StableDiffusionGGML { ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM"); } - int64_t t1 = ggml_time_ms(); - LOG_INFO("loading model from '%s' completed, taking %.2fs", SAFE_STR(sd_ctx_params->model_path), (t1 - t0) * 1.0f / 1000); - // check is_using_v_parameterization_for_sd2 - if (sd_version_is_sd2(version)) { if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) { is_using_v_parameterization = true;