Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1966,6 +1966,16 @@ std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& v
}

bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
int64_t process_time_ms = 0;
int64_t read_time_ms = 0;
int64_t memcpy_time_ms = 0;
int64_t copy_to_backend_time_ms = 0;
int64_t convert_time_ms = 0;

int64_t prev_time_ms = 0;
int64_t curr_time_ms = 0;
int64_t start_time = ggml_time_ms();
prev_time_ms = start_time;
std::vector<TensorStorage> processed_tensor_storages;
for (auto& tensor_storage : tensor_storages) {
// LOG_DEBUG("%s", name.c_str());
Expand All @@ -1978,6 +1988,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
}
std::vector<TensorStorage> dedup = remove_duplicates(processed_tensor_storages);
processed_tensor_storages = dedup;
curr_time_ms = ggml_time_ms();
process_time_ms = curr_time_ms - prev_time_ms;
prev_time_ms = curr_time_ms;

bool success = true;
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
Expand Down Expand Up @@ -2019,15 +2032,27 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
size_t entry_size = zip_entry_size(zip);
if (entry_size != n) {
read_buffer.resize(entry_size);
prev_time_ms = ggml_time_ms();
zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size);
curr_time_ms = ggml_time_ms();
read_time_ms += curr_time_ms - prev_time_ms;
prev_time_ms = curr_time_ms;
memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n);
curr_time_ms = ggml_time_ms();
memcpy_time_ms += curr_time_ms - prev_time_ms;
} else {
prev_time_ms = ggml_time_ms();
zip_entry_noallocread(zip, (void*)buf, n);
curr_time_ms = ggml_time_ms();
read_time_ms += curr_time_ms - prev_time_ms;
}
zip_entry_close(zip);
} else {
prev_time_ms = ggml_time_ms();
file.seekg(tensor_storage.offset);
file.read(buf, n);
curr_time_ms = ggml_time_ms();
read_time_ms += curr_time_ms - prev_time_ms;
if (!file) {
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
return false;
Expand Down Expand Up @@ -2072,6 +2097,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
}

prev_time_ms = ggml_time_ms();
if (tensor_storage.is_bf16) {
// inplace op
bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements());
Expand All @@ -2086,10 +2112,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
} else if (tensor_storage.is_i64) {
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
}
curr_time_ms = ggml_time_ms();
convert_time_ms += curr_time_ms - prev_time_ms;
} else {
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);

prev_time_ms = ggml_time_ms();
if (tensor_storage.is_bf16) {
// inplace op
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
Expand All @@ -2109,11 +2138,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {

convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data,
dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
curr_time_ms = ggml_time_ms();
convert_time_ms += curr_time_ms - prev_time_ms;
}
} else {
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);

prev_time_ms = ggml_time_ms();
if (tensor_storage.is_bf16) {
// inplace op
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
Expand All @@ -2133,14 +2165,24 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {

if (tensor_storage.type == dst_tensor->type) {
// copy to device memory
curr_time_ms = ggml_time_ms();
convert_time_ms += curr_time_ms - prev_time_ms;
prev_time_ms = curr_time_ms;
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
curr_time_ms = ggml_time_ms();
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
} else {
// convert first, then copy to device memory
convert_buffer.resize(ggml_nbytes(dst_tensor));
convert_tensor((void*)read_buffer.data(), tensor_storage.type,
(void*)convert_buffer.data(), dst_tensor->type,
(int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
curr_time_ms = ggml_time_ms();
convert_time_ms += curr_time_ms - prev_time_ms;
prev_time_ms = curr_time_ms;
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
curr_time_ms = ggml_time_ms();
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
}
}
++tensor_count;
Expand Down Expand Up @@ -2170,6 +2212,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
break;
}
}
int64_t end_time = ggml_time_ms();
LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
(end_time - start_time) / 1000.f,
process_time_ms / 1000.f,
read_time_ms / 1000.f,
memcpy_time_ms / 1000.f,
convert_time_ms / 1000.f,
copy_to_backend_time_ms / 1000.f);
return success;
}

Expand Down
6 changes: 0 additions & 6 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,6 @@ class StableDiffusionGGML {
// load weights
LOG_DEBUG("loading weights");

int64_t t0 = ggml_time_ms();

std::set<std::string> ignore_tensors;
tensors["alphas_cumprod"] = alphas_cumprod_tensor;
if (use_tiny_autoencoder) {
Expand Down Expand Up @@ -656,11 +654,7 @@ class StableDiffusionGGML {
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM");
}

int64_t t1 = ggml_time_ms();
LOG_INFO("loading model from '%s' completed, taking %.2fs", SAFE_STR(sd_ctx_params->model_path), (t1 - t0) * 1.0f / 1000);

// check is_using_v_parameterization_for_sd2

if (sd_version_is_sd2(version)) {
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
is_using_v_parameterization = true;
Expand Down
Loading