LostRuins
diff --git a/‎otherarch/sdcpp/ggml_extend.hpp‎
Lines changed: 8 additions & 38 deletions b/‎otherarch/sdcpp/ggml_extend.hpp‎
Lines changed: 8 additions & 38 deletions
diff --git a/‎otherarch/sdcpp/lora.hpp‎
Lines changed: 37 additions & 24 deletions b/‎otherarch/sdcpp/lora.hpp‎
Lines changed: 37 additions & 24 deletions
@@ -194,20 +194,10 @@ __STATIC_INLINE__ float sd_image_get_f32(sd_image_t image, int iw, int ih, int i
     return value;
 }
 
-#if 0 // kcpp
-static struct ggml_tensor* get_tensor_from_graph(struct ggml_cgraph* gf, const char* name) {
-    struct ggml_tensor* res = NULL;
-    for (int i = 0; i < ggml_graph_n_nodes(gf); i++) {
-        struct ggml_tensor* node = ggml_graph_node(gf, i);
-        // printf("%d, %s \n", i, ggml_get_name(node));
-        if (strcmp(ggml_get_name(node), name) == 0) {
-            res = node;
-            break;
-        }
-    }
-    return res;
+__STATIC_INLINE__ float sd_image_get_f32(sd_image_f32_t image, int iw, int ih, int ic) {
+    float value = *(image.data + ih * image.width * image.channel + iw * image.channel + ic);
+    return value;
 }
-#endif
 
 __STATIC_INLINE__ void print_ggml_tensor(struct ggml_tensor* tensor, bool shape_only = false, const char* mark = "") {
     printf("%s (%s): shape(%zu, %zu, %zu, %zu)\n", mark, ggml_type_name(tensor->type), tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
@@ -462,28 +452,6 @@ __STATIC_INLINE__ void sd_apply_mask(struct ggml_tensor* image_data,
     }
 }
 
-__STATIC_INLINE__ void sd_mul_images_to_tensor(const uint8_t* image_data,
-                                               struct ggml_tensor* output,
-                                               int idx,
-                                               float* mean = NULL,
-                                               float* std  = NULL) {
-    int64_t width    = output->ne[0];
-    int64_t height   = output->ne[1];
-    int64_t channels = output->ne[2];
-    GGML_ASSERT(channels == 3 && output->type == GGML_TYPE_F32);
-    for (int iy = 0; iy < height; iy++) {
-        for (int ix = 0; ix < width; ix++) {
-            for (int k = 0; k < channels; k++) {
-                int value       = *(image_data + iy * width * channels + ix * channels + k);
-                float pixel_val = value / 255.0f;
-                if (mean != NULL && std != NULL)
-                    pixel_val = (pixel_val - mean[k]) / std[k];
-                ggml_tensor_set_f32(output, pixel_val, ix, iy, k, idx);
-            }
-        }
-    }
-}
-
 __STATIC_INLINE__ void sd_image_f32_to_tensor(const float* image_data,
                                               struct ggml_tensor* output,
                                               bool scale = true) {
@@ -786,9 +754,11 @@ __STATIC_INLINE__ std::vector<struct ggml_tensor*> ggml_chunk(struct ggml_contex
 
 typedef std::function<void(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
 
-__STATIC_INLINE__ void
-sd_tiling_calc_tiles(int &num_tiles_dim, float& tile_overlap_factor_dim, int small_dim, int tile_size, const float tile_overlap_factor) {
-
+__STATIC_INLINE__ void sd_tiling_calc_tiles(int& num_tiles_dim,
+                                            float& tile_overlap_factor_dim,
+                                            int small_dim,
+                                            int tile_size,
+                                            const float tile_overlap_factor) {
     int tile_overlap     = (tile_size * tile_overlap_factor);
     int non_tile_overlap = tile_size - tile_overlap;
 
 
@@ -1,6 +1,7 @@
 #ifndef __LORA_HPP__
 #define __LORA_HPP__
 
+#include <mutex>
 #include "ggml_extend.hpp"
 
 #define LORA_GRAPH_BASE_SIZE 10240
@@ -115,49 +116,61 @@ struct LoraModel : public GGMLRunner {
         return "lora";
     }
 
-    bool load_from_file(bool filter_tensor = false) {
+    bool load_from_file(bool filter_tensor = false, int n_threads = 0) {
         LOG_INFO("loading LoRA from '%s'", file_path.c_str());
 
         if (load_failed) {
             LOG_ERROR("init lora model loader from file failed: '%s'", file_path.c_str());
             return false;
         }
 
+        std::unordered_map<std::string, TensorStorage> tensors_to_create;
+        std::mutex lora_mutex;
         bool dry_run          = true;
         auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
-            const std::string& name = tensor_storage.name;
+            if (dry_run) {
+                const std::string& name = tensor_storage.name;
 
-            if (filter_tensor && !contains(name, "lora")) {
-                // LOG_INFO("skipping LoRA tesnor '%s'", name.c_str());
-                return true;
-            }
-            // LOG_INFO("lora_tensor %s", name.c_str());
-            for (int i = 0; i < LORA_TYPE_COUNT; i++) {
-                if (name.find(type_fingerprints[i]) != std::string::npos) {
-                    type = (lora_t)i;
-                    break;
+                if (filter_tensor && !contains(name, "lora")) {
+                    return true;
                 }
-            }
 
-            if (dry_run) {
-                struct ggml_tensor* real = ggml_new_tensor(params_ctx,
-                                                           tensor_storage.type,
-                                                           tensor_storage.n_dims,
-                                                           tensor_storage.ne);
-                lora_tensors[name]       = real;
+                {
+                    std::lock_guard<std::mutex> lock(lora_mutex);
+                    for (int i = 0; i < LORA_TYPE_COUNT; i++) {
+                        if (name.find(type_fingerprints[i]) != std::string::npos) {
+                            type = (lora_t)i;
+                            break;
+                        }
+                    }
+                    tensors_to_create[name] = tensor_storage;
+                }
             } else {
-                auto real   = lora_tensors[name];
-                *dst_tensor = real;
+                const std::string& name = tensor_storage.name;
+                auto iter               = lora_tensors.find(name);
+                if (iter != lora_tensors.end()) {
+                    *dst_tensor = iter->second;
+                }
             }
-
             return true;
         };
 
-        model_loader.load_tensors(on_new_tensor_cb);
+        model_loader.load_tensors(on_new_tensor_cb, n_threads);
+
+        for (const auto& pair : tensors_to_create) {
+            const auto& name         = pair.first;
+            const auto& ts           = pair.second;
+            struct ggml_tensor* real = ggml_new_tensor(params_ctx,
+                                                       ts.type,
+                                                       ts.n_dims,
+                                                       ts.ne);
+            lora_tensors[name]       = real;
+        }
+
         alloc_params_buffer();
-        // exit(0);
+
         dry_run = false;
-        model_loader.load_tensors(on_new_tensor_cb);
+        model_loader.load_tensors(on_new_tensor_cb, n_threads);
 
         LOG_DEBUG("lora type: \"%s\"/\"%s\"", lora_downs[type].c_str(), lora_ups[type].c_str());