Skip to content

Commit 4bc8939

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 8c7a2ce + 9b9201f commit 4bc8939

File tree

24 files changed

+1052
-76
lines changed

24 files changed

+1052
-76
lines changed

ggml/src/ggml-cpu/vec.h

Lines changed: 91 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,85 @@ inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp
7777
z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) + GGML_CPU_FP16_TO_FP32(y[i]));
7878
}
7979
}
80-
inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; }
81-
inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] += x[i]; }
82-
inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] += v; }
80+
inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) {
81+
int i = 0;
82+
#if defined(GGML_SIMD)
83+
const int np = (n & ~(GGML_F32_STEP - 1));
84+
85+
GGML_F32_VEC vv = GGML_F32_VEC_SET1(v);
86+
87+
for (; i < np; i += GGML_F32_STEP) {
88+
for (int j = 0; j < GGML_F32_ARR; ++j) {
89+
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
90+
GGML_F32_VEC az = GGML_F32_VEC_ADD(ax, vv);
91+
GGML_F32_VEC_STORE(z + i + j*GGML_F32_EPR, az);
92+
}
93+
}
94+
#endif
95+
for (; i < n; ++i) {
96+
z[i] = x[i] + v;
97+
}
98+
}
99+
inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) {
100+
int i = 0;
101+
#if defined(GGML_SIMD)
102+
const int np = (n & ~(GGML_F32_STEP - 1));
103+
104+
for (; i < np; i += GGML_F32_STEP) {
105+
for (int j = 0; j < GGML_F32_ARR; ++j) {
106+
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
107+
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
108+
ay = GGML_F32_VEC_ADD(ay, ax);
109+
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay);
110+
}
111+
}
112+
#endif
113+
for (; i < n; ++i) {
114+
y[i] += x[i];
115+
}
116+
}
117+
inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) {
118+
int i = 0;
119+
#if defined(GGML_SIMD)
120+
const int np = (n & ~(GGML_F32_STEP - 1));
121+
122+
GGML_F32_VEC vv = GGML_F32_VEC_SET1(v);
123+
124+
for (; i < np; i += GGML_F32_STEP) {
125+
for (int j = 0; j < GGML_F32_ARR; ++j) {
126+
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
127+
ay = GGML_F32_VEC_ADD(ay, vv);
128+
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay);
129+
}
130+
}
131+
#endif
132+
for (; i < n; ++i) {
133+
y[i] += v;
134+
}
135+
}
83136
inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; }
84137
inline static void ggml_vec_sub_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
85138
for (int i = 0; i < n; ++i) {
86139
z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) - GGML_CPU_FP16_TO_FP32(y[i]));
87140
}
88141
}
89-
inline static void ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; }
142+
inline static void ggml_vec_set_f32 (const int n, float * x, const float v) {
143+
int i = 0;
144+
#if defined(GGML_SIMD)
145+
const int np = (n & ~(GGML_F32_STEP - 1));
146+
147+
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
148+
149+
for (; i < np; i += GGML_F32_STEP) {
150+
for (int j = 0; j < GGML_F32_ARR; ++j) {
151+
GGML_F32_VEC_STORE(x + i + j*GGML_F32_EPR, vx);
152+
}
153+
}
154+
#endif
155+
for (; i < n; ++i) {
156+
x[i] = v;
157+
}
158+
}
90159
inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]; }
91160
inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
92161
inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
@@ -95,7 +164,24 @@ inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp
95164
}
96165
}
97166

98-
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
167+
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) {
168+
int i = 0;
169+
#if defined(GGML_SIMD)
170+
const int np = (n & ~(GGML_F32_STEP - 1));
171+
172+
for (; i < np; i += GGML_F32_STEP) {
173+
for (int j = 0; j < GGML_F32_ARR; ++j) {
174+
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
175+
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
176+
GGML_F32_VEC az = GGML_F32_VEC_MUL(ax, ay);
177+
GGML_F32_VEC_STORE(z + i + j*GGML_F32_EPR, az);
178+
}
179+
}
180+
#endif
181+
for (; i < n; ++i) {
182+
z[i] = x[i]*y[i];
183+
}
184+
}
99185
inline static void ggml_vec_mul_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
100186
for (int i = 0; i < n; ++i) {
101187
z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) * GGML_CPU_FP16_TO_FP32(y[i]));

tests/test-thread-safety.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// - Creates n_parallel (--parallel) contexts per model
44
// - Runs inference in parallel on each context
55

6+
#include <array>
67
#include <thread>
78
#include <vector>
89
#include <atomic>
@@ -38,13 +39,14 @@ int main(int argc, char ** argv) {
3839
cparams.n_seq_max = 1;
3940

4041
int dev_count = ggml_backend_dev_count();
41-
int gpu_dev_count = 0;
42+
std::vector<std::array<ggml_backend_dev_t, 2>> gpus;
4243
for (int i = 0; i < dev_count; ++i) {
4344
auto * dev = ggml_backend_dev_get(i);
4445
if (dev && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
45-
gpu_dev_count++;
46+
gpus.push_back({dev, nullptr});
4647
}
4748
}
49+
const int gpu_dev_count = (int)gpus.size();
4850
const int num_models = gpu_dev_count + 1 + 1; // GPUs + 1 CPU model + 1 layer split
4951
//const int num_models = std::max(1, gpu_dev_count);
5052
const int num_contexts = std::max(1, params.n_parallel);
@@ -58,12 +60,12 @@ int main(int argc, char ** argv) {
5860

5961
if (m < gpu_dev_count) {
6062
mparams.split_mode = LLAMA_SPLIT_MODE_NONE;
61-
mparams.main_gpu = m;
63+
mparams.devices = gpus[m].data();
6264
} else if (m == gpu_dev_count) {
6365
mparams.split_mode = LLAMA_SPLIT_MODE_NONE;
6466
mparams.main_gpu = -1; // CPU model
6567
} else {
66-
mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;;
68+
mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;
6769
}
6870

6971
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);

tools/server/public/index.html.gz

4.38 KB
Binary file not shown.

tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import { Button } from '$lib/components/ui/button';
44
import ChatFormActionFileAttachments from './ChatFormActionFileAttachments.svelte';
55
import ChatFormActionRecord from './ChatFormActionRecord.svelte';
6+
import ChatFormModelSelector from './ChatFormModelSelector.svelte';
7+
import { config } from '$lib/stores/settings.svelte';
68
import type { FileTypeCategory } from '$lib/enums/files';
79
810
interface Props {
@@ -26,32 +28,36 @@
2628
onMicClick,
2729
onStop
2830
}: Props = $props();
31+
32+
let currentConfig = $derived(config());
2933
</script>
3034

31-
<div class="flex items-center justify-between gap-1 {className}">
32-
<ChatFormActionFileAttachments {disabled} {onFileUpload} />
35+
<div class="flex w-full items-center gap-2 {className}">
36+
<ChatFormActionFileAttachments class="mr-auto" {disabled} {onFileUpload} />
37+
38+
{#if currentConfig.modelSelectorEnabled}
39+
<ChatFormModelSelector class="shrink-0" />
40+
{/if}
3341

34-
<div class="flex gap-2">
35-
{#if isLoading}
36-
<Button
37-
type="button"
38-
onclick={onStop}
39-
class="h-8 w-8 bg-transparent p-0 hover:bg-destructive/20"
40-
>
41-
<span class="sr-only">Stop</span>
42-
<Square class="h-8 w-8 fill-destructive stroke-destructive" />
43-
</Button>
44-
{:else}
45-
<ChatFormActionRecord {disabled} {isLoading} {isRecording} {onMicClick} />
42+
{#if isLoading}
43+
<Button
44+
type="button"
45+
onclick={onStop}
46+
class="h-8 w-8 bg-transparent p-0 hover:bg-destructive/20"
47+
>
48+
<span class="sr-only">Stop</span>
49+
<Square class="h-8 w-8 fill-destructive stroke-destructive" />
50+
</Button>
51+
{:else}
52+
<ChatFormActionRecord {disabled} {isLoading} {isRecording} {onMicClick} />
4653

47-
<Button
48-
type="submit"
49-
disabled={!canSend || disabled || isLoading}
50-
class="h-8 w-8 rounded-full p-0"
51-
>
52-
<span class="sr-only">Send</span>
53-
<ArrowUp class="h-12 w-12" />
54-
</Button>
55-
{/if}
56-
</div>
54+
<Button
55+
type="submit"
56+
disabled={!canSend || disabled || isLoading}
57+
class="h-8 w-8 rounded-full p-0"
58+
>
59+
<span class="sr-only">Send</span>
60+
<ArrowUp class="h-12 w-12" />
61+
</Button>
62+
{/if}
5763
</div>

0 commit comments

Comments
 (0)