Thireus
diff --git a/‎ggml/src/ggml-cpu/vec.h‎
Lines changed: 91 additions & 5 deletions b/‎ggml/src/ggml-cpu/vec.h‎
Lines changed: 91 additions & 5 deletions
diff --git a/‎tests/test-thread-safety.cpp‎
Lines changed: 6 additions & 4 deletions b/‎tests/test-thread-safety.cpp‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎tools/server/public/index.html.gz‎
4.38 KB b/‎tools/server/public/index.html.gz‎
4.38 KB
diff --git a/‎tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte‎
Lines changed: 30 additions & 24 deletions b/‎tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte‎
Lines changed: 30 additions & 24 deletions
@@ -77,16 +77,85 @@ inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp
         z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) + GGML_CPU_FP16_TO_FP32(y[i]));
     }
 }
-inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float   v) { for (int i = 0; i < n; ++i) z[i]  = x[i] + v;    }
-inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x)                  { for (int i = 0; i < n; ++i) y[i] += x[i];        }
-inline static void ggml_vec_acc1_f32(const int n, float * y, const float   v)                  { for (int i = 0; i < n; ++i) y[i] += v;           }
+inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) {
+    int i = 0;
+#if defined(GGML_SIMD)
+    const int np = (n & ~(GGML_F32_STEP - 1));
+
+    GGML_F32_VEC vv = GGML_F32_VEC_SET1(v);
+
+    for (; i < np; i += GGML_F32_STEP) {
+        for (int j = 0; j < GGML_F32_ARR; ++j) {
+            GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
+            GGML_F32_VEC az = GGML_F32_VEC_ADD(ax, vv);
+            GGML_F32_VEC_STORE(z + i + j*GGML_F32_EPR, az);
+        }
+    }
+#endif
+    for (; i < n; ++i) {
+        z[i] = x[i] + v;
+    }
+}
+inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) {
+    int i = 0;
+#if defined(GGML_SIMD)
+    const int np = (n & ~(GGML_F32_STEP - 1));
+
+    for (; i < np; i += GGML_F32_STEP) {
+        for (int j = 0; j < GGML_F32_ARR; ++j) {
+            GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
+            GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
+            ay = GGML_F32_VEC_ADD(ay, ax);
+            GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay);
+        }
+    }
+#endif
+    for (; i < n; ++i) {
+        y[i] += x[i];
+    }
+}
+inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) {
+    int i = 0;
+#if defined(GGML_SIMD)
+    const int np = (n & ~(GGML_F32_STEP - 1));
+
+    GGML_F32_VEC vv = GGML_F32_VEC_SET1(v);
+
+    for (; i < np; i += GGML_F32_STEP) {
+        for (int j = 0; j < GGML_F32_ARR; ++j) {
+            GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
+            ay = GGML_F32_VEC_ADD(ay, vv);
+            GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay);
+        }
+    }
+#endif
+    for (; i < n; ++i) {
+        y[i] += v;
+    }
+}
 inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i]  = x[i] - y[i]; }
 inline static void ggml_vec_sub_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
     for (int i = 0; i < n; ++i) {
         z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) - GGML_CPU_FP16_TO_FP32(y[i]));
     }
 }
-inline static void ggml_vec_set_f32 (const int n, float * x, const float   v)                  { for (int i = 0; i < n; ++i) x[i]  = v;           }
+inline static void ggml_vec_set_f32 (const int n, float * x, const float v) {
+    int i = 0;
+#if defined(GGML_SIMD)
+    const int np = (n & ~(GGML_F32_STEP - 1));
+
+    GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
+
+    for (; i < np; i += GGML_F32_STEP) {
+        for (int j = 0; j < GGML_F32_ARR; ++j) {
+            GGML_F32_VEC_STORE(x + i + j*GGML_F32_EPR, vx);
+        }
+    }
+#endif
+    for (; i < n; ++i) {
+        x[i] = v;
+    }
+}
 inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x)                  { for (int i = 0; i < n; ++i) y[i]  = x[i];        }
 inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x)                  { for (int i = 0; i < n; ++i) y[i]  = -x[i];       }
 inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
@@ -95,7 +164,24 @@ inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp
     }
 }
 
-inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i]  = x[i]*y[i];   }
+inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) {
+    int i = 0;
+#if defined(GGML_SIMD)
+    const int np = (n & ~(GGML_F32_STEP - 1));
+
+    for (; i < np; i += GGML_F32_STEP) {
+        for (int j = 0; j < GGML_F32_ARR; ++j) {
+            GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
+            GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
+            GGML_F32_VEC az = GGML_F32_VEC_MUL(ax, ay);
+            GGML_F32_VEC_STORE(z + i + j*GGML_F32_EPR, az);
+        }
+    }
+#endif
+    for (; i < n; ++i) {
+        z[i] = x[i]*y[i];
+    }
+}
 inline static void ggml_vec_mul_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
     for (int i = 0; i < n; ++i) {
         z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) * GGML_CPU_FP16_TO_FP32(y[i]));
 
@@ -3,6 +3,7 @@
 // - Creates n_parallel (--parallel) contexts per model
 // - Runs inference in parallel on each context
 
+#include <array>
 #include <thread>
 #include <vector>
 #include <atomic>
@@ -38,13 +39,14 @@ int main(int argc, char ** argv) {
     cparams.n_seq_max = 1;
 
     int dev_count = ggml_backend_dev_count();
-    int gpu_dev_count = 0;
+    std::vector<std::array<ggml_backend_dev_t, 2>> gpus;
     for (int i = 0; i < dev_count; ++i) {
         auto * dev = ggml_backend_dev_get(i);
         if (dev && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
-            gpu_dev_count++;
+            gpus.push_back({dev, nullptr});
         }
     }
+    const int gpu_dev_count = (int)gpus.size();
     const int num_models = gpu_dev_count + 1 + 1; // GPUs + 1 CPU model + 1 layer split
     //const int num_models = std::max(1, gpu_dev_count);
     const int num_contexts = std::max(1, params.n_parallel);
@@ -58,12 +60,12 @@ int main(int argc, char ** argv) {
 
         if (m < gpu_dev_count) {
             mparams.split_mode = LLAMA_SPLIT_MODE_NONE;
-            mparams.main_gpu = m;
+            mparams.devices = gpus[m].data();
         } else if (m == gpu_dev_count) {
             mparams.split_mode = LLAMA_SPLIT_MODE_NONE;
             mparams.main_gpu = -1; // CPU model
         } else {
-            mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;;
+            mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;
         }
 
         llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
 
@@ -3,6 +3,8 @@
 	import { Button } from '$lib/components/ui/button';
 	import ChatFormActionFileAttachments from './ChatFormActionFileAttachments.svelte';
 	import ChatFormActionRecord from './ChatFormActionRecord.svelte';
+	import ChatFormModelSelector from './ChatFormModelSelector.svelte';
+	import { config } from '$lib/stores/settings.svelte';
 	import type { FileTypeCategory } from '$lib/enums/files';
 
 	interface Props {
@@ -26,32 +28,36 @@
 		onMicClick,
 		onStop
 	}: Props = $props();
+
+	let currentConfig = $derived(config());
 </script>
 
-<div class="flex items-center justify-between gap-1 {className}">
-	<ChatFormActionFileAttachments {disabled} {onFileUpload} />
+<div class="flex w-full items-center gap-2 {className}">
+	<ChatFormActionFileAttachments class="mr-auto" {disabled} {onFileUpload} />
+
+	{#if currentConfig.modelSelectorEnabled}
+		<ChatFormModelSelector class="shrink-0" />
+	{/if}
 
-	<div class="flex gap-2">
-		{#if isLoading}
-			<Button
-				type="button"
-				onclick={onStop}
-				class="h-8 w-8 bg-transparent p-0 hover:bg-destructive/20"
-			>
-				<span class="sr-only">Stop</span>
-				<Square class="h-8 w-8 fill-destructive stroke-destructive" />
-			</Button>
-		{:else}
-			<ChatFormActionRecord {disabled} {isLoading} {isRecording} {onMicClick} />
+	{#if isLoading}
+		<Button
+			type="button"
+			onclick={onStop}
+			class="h-8 w-8 bg-transparent p-0 hover:bg-destructive/20"
+		>
+			<span class="sr-only">Stop</span>
+			<Square class="h-8 w-8 fill-destructive stroke-destructive" />
+		</Button>
+	{:else}
+		<ChatFormActionRecord {disabled} {isLoading} {isRecording} {onMicClick} />
 
-			<Button
-				type="submit"
-				disabled={!canSend || disabled || isLoading}
-				class="h-8 w-8 rounded-full p-0"
-			>
-				<span class="sr-only">Send</span>
-				<ArrowUp class="h-12 w-12" />
-			</Button>
-		{/if}
-	</div>
+		<Button
+			type="submit"
+			disabled={!canSend || disabled || isLoading}
+			class="h-8 w-8 rounded-full p-0"
+		>
+			<span class="sr-only">Send</span>
+			<ArrowUp class="h-12 w-12" />
+		</Button>
+	{/if}
 </div>