Skip to content

Commit dd55478

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents d38d6a9 + 3526657 commit dd55478

File tree

4 files changed

+12
-4
lines changed

4 files changed

+12
-4
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
2-
31
#include <iostream>
42
#include <fstream>
53
#include <sstream>
@@ -22,6 +20,7 @@
2220
#include <sys/types.h>
2321

2422
#ifdef _WIN32
23+
#define NOMINMAX
2524
#include <windows.h>
2625
#include <direct.h> // For _mkdir on Windows
2726
#else
@@ -306,7 +305,7 @@ using compile_count_guard = std::unique_ptr<uint32_t, decltype(&decrement_compil
306305
compile_count_guard acquire_compile_slot() {
307306
// wait until fewer than N compiles are in progress.
308307
// 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
309-
uint32_t N = 16;
308+
uint32_t N = std::max(1u, std::min(16u, std::thread::hardware_concurrency()));
310309
std::unique_lock<std::mutex> guard(compile_count_mutex);
311310
compile_count_cond.wait(guard, [N] { return compile_count < N; });
312311
compile_count++;

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ static void ggml_backend_webgpu_build_and_enqueue(webgpu_context &
424424
ctx->staged_param_bufs.push_back(params_bufs);
425425
if (ctx->staged_command_bufs.size() == WEBGPU_COMMAND_SUBMIT_BATCH_SIZE) {
426426
ggml_backend_webgpu_submit_queue(ctx);
427+
ggml_backend_webgpu_wait_on_submission(ctx);
427428
}
428429
}
429430
}
@@ -1060,6 +1061,9 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node) {
10601061
case GGML_OP_SCALE:
10611062
ggml_webgpu_scale(ctx, src0, node);
10621063
break;
1064+
case GGML_OP_SOFT_MAX:
1065+
ggml_webgpu_soft_max(ctx, src0, src1, src2, node);
1066+
break;
10631067
default:
10641068
return false;
10651069
}
@@ -1806,6 +1810,9 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const
18061810
case GGML_OP_SCALE:
18071811
supports_op = op->type == GGML_TYPE_F32;
18081812
break;
1813+
case GGML_OP_SOFT_MAX:
1814+
supports_op = op->type == GGML_TYPE_F32;
1815+
break;
18091816
default:
18101817
break;
18111818
}
@@ -1949,6 +1956,7 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t
19491956
ggml_webgpu_init_rope_pipeline(ctx);
19501957
ggml_webgpu_init_glu_pipeline(ctx);
19511958
ggml_webgpu_init_scale_pipeline(ctx);
1959+
ggml_webgpu_init_soft_max_pipeline(ctx);
19521960

19531961
#ifdef GGML_WEBGPU_DEBUG
19541962
// Initialize debug buffers

ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ fn main(@builtin(workgroup_id) wid: vec3<u32>,
8484
let i2 = i / params.ne1;
8585
let i1 = i % params.ne1;
8686
let i_src_row = params.offset_src + i3 * params.stride_src3 + i2 * params.stride_src2 + i1 * params.stride_src1;
87-
let i_dst_row = params.offset_src + i3 * params.stride_dst3 + i2 * params.stride_dst2 + i1 * params.stride_dst1;
87+
let i_dst_row = params.offset_dst + i3 * params.stride_dst3 + i2 * params.stride_dst2 + i1 * params.stride_dst1;
8888

8989
let elems = (params.ne0 + wg_size - 1) / wg_size;
9090

ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ fn main(@builtin(workgroup_id) wid: vec3<u32>,
300300
workgroupBarrier();
301301
}
302302
let row_max = scratch[0];
303+
workgroupBarrier();
303304

304305
var sum = 0.0f;
305306
col = lid.x;

0 commit comments

Comments
 (0)