Skip to content

Commit 831ea3c

Browse files
committed
Work on shader type generation
1 parent 6355137 commit 831ea3c

File tree

1 file changed

+58
-83
lines changed

1 file changed

+58
-83
lines changed

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 58 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -235,15 +235,6 @@ static void ggml_webgpu_create_buffer(wgpu::Device & device,
235235

236236
/** End WebGPU object initializations */
237237

238-
/** Utility Functions */
239-
240-
size_t ggml_webgpu_binding_size(ggml_tensor * t, size_t misalignment) {
241-
return (ggml_nbytes(t) + misalignment + WEBGPU_STORAGE_BUF_BINDING_MULT - 1) &
242-
~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1);
243-
}
244-
245-
/** End Utility Functions */
246-
247238
/** WebGPU Actions */
248239

249240
// Wait for the queue to finish processing all submitted work
@@ -438,15 +429,6 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_context & ctx,
438429
ggml_backend_webgpu_build_and_enqueue(ctx, ctx->memset_pipeline, params, entries, wg_x, true);
439430
}
440431

441-
static size_t ggml_backend_webgpu_tensor_offset(const ggml_tensor * tensor) {
442-
return webgpu_tensor_offset(tensor) + tensor->view_offs;
443-
}
444-
445-
static wgpu::Buffer ggml_backend_webgpu_tensor_buf(const ggml_tensor * tensor) {
446-
ggml_backend_webgpu_buffer_context * ctx = (ggml_backend_webgpu_buffer_context *) tensor->buffer->context;
447-
return ctx->buffer;
448-
}
449-
450432
/** End WebGPU Actions */
451433

452434
/** GGML Backend Interface */
@@ -464,19 +446,35 @@ static void ggml_backend_webgpu_free(ggml_backend_t backend) {
464446
GGML_UNUSED(ctx);
465447
}
466448

449+
static size_t ggml_webgpu_tensor_offset(const ggml_tensor * tensor) {
450+
return webgpu_tensor_offset(tensor) + tensor->view_offs;
451+
}
452+
453+
static wgpu::Buffer ggml_webgpu_tensor_buf(const ggml_tensor * tensor) {
454+
ggml_backend_webgpu_buffer_context * ctx = (ggml_backend_webgpu_buffer_context *) tensor->buffer->context;
455+
return ctx->buffer;
456+
}
457+
static size_t ggml_webgpu_tensor_misalignment(webgpu_context & ctx, ggml_tensor * t) {
458+
size_t offset = ggml_webgpu_tensor_offset(t);
459+
return offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
460+
}
461+
462+
static size_t ggml_webgpu_tensor_align_offset(webgpu_context & ctx, ggml_tensor * t) {
463+
size_t offset = ggml_webgpu_tensor_offset(t);
464+
return offset & ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
465+
}
466+
467+
static size_t ggml_webgpu_tensor_binding_size(webgpu_context & ctx, ggml_tensor * t) {
468+
return (ggml_nbytes(t) + ggml_webgpu_tensor_misalignment(ctx, t) + WEBGPU_STORAGE_BUF_BINDING_MULT - 1) &
469+
~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1);
470+
}
471+
467472
static void ggml_webgpu_cpy(webgpu_context & ctx, ggml_tensor * src, ggml_tensor * dst) {
468-
size_t src_offset = ggml_backend_webgpu_tensor_offset(src);
469-
// assumes power of 2 offset alignment
470-
size_t src_misalignment = src_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
471-
// align to minimum offset alignment
472-
src_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
473-
size_t dst_offset = ggml_backend_webgpu_tensor_offset(dst);
474-
size_t dst_misalignment = dst_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
475-
dst_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
476-
uint32_t ne = (uint32_t) ggml_nelements(dst);
473+
uint32_t ne = (uint32_t) ggml_nelements(dst);
474+
477475
std::vector<uint32_t> params = { ne,
478-
(uint32_t) (src_misalignment / ggml_type_size(src->type)),
479-
(uint32_t) (dst_misalignment / ggml_type_size(dst->type)),
476+
(uint32_t) (ggml_webgpu_tensor_misalignment(ctx, src) / ggml_type_size(src->type)),
477+
(uint32_t) (ggml_webgpu_tensor_misalignment(ctx, dst) / ggml_type_size(dst->type)),
480478
// Convert byte-strides to element-strides
481479
(uint32_t) (src->nb[0] / ggml_type_size(src->type)),
482480
(uint32_t) (src->nb[1] / ggml_type_size(src->type)),
@@ -494,13 +492,13 @@ static void ggml_webgpu_cpy(webgpu_context & ctx, ggml_tensor * src, ggml_tensor
494492

495493
std::vector<wgpu::BindGroupEntry> entries = {
496494
{ .binding = 0,
497-
.buffer = ggml_backend_webgpu_tensor_buf(src),
498-
.offset = src_offset,
499-
.size = ggml_webgpu_binding_size(src, src_misalignment) },
495+
.buffer = ggml_webgpu_tensor_buf(src),
496+
.offset = ggml_webgpu_tensor_align_offset(ctx, src),
497+
.size = ggml_webgpu_tensor_binding_size(ctx, src) },
500498
{ .binding = 1,
501-
.buffer = ggml_backend_webgpu_tensor_buf(dst),
502-
.offset = dst_offset,
503-
.size = ggml_webgpu_binding_size(dst, dst_misalignment) }
499+
.buffer = ggml_webgpu_tensor_buf(dst),
500+
.offset = ggml_webgpu_tensor_align_offset(ctx, dst),
501+
.size = ggml_webgpu_tensor_binding_size(ctx, dst) }
504502
};
505503

506504
size_t max_wg_size = ctx->limits.maxComputeWorkgroupSizeX;
@@ -519,21 +517,9 @@ static void ggml_webgpu_set_rows(webgpu_context & ctx, ggml_tensor * src, ggml_t
519517
error_bufs.host_buf.Unmap();
520518
}
521519

522-
size_t src_offset = ggml_backend_webgpu_tensor_offset(src);
523-
// assumes power of 2 offset alignment
524-
size_t src_misalignment = src_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
525-
// align to minimum offset alignment
526-
src_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
527-
size_t idx_offset = ggml_backend_webgpu_tensor_offset(idx);
528-
size_t idx_misalignment = idx_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
529-
idx_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
530-
size_t dst_offset = ggml_backend_webgpu_tensor_offset(dst);
531-
size_t dst_misalignment = dst_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
532-
dst_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
533-
534-
std::vector<uint32_t> params = { (uint32_t) (src_misalignment / ggml_type_size(src->type)),
535-
(uint32_t) (idx_misalignment / ggml_type_size(idx->type)),
536-
(uint32_t) (dst_misalignment / ggml_type_size(dst->type)),
520+
std::vector<uint32_t> params = { (uint32_t) (ggml_webgpu_tensor_misalignment(ctx, src) / ggml_type_size(src->type)),
521+
(uint32_t) (ggml_webgpu_tensor_misalignment(ctx, idx) / ggml_type_size(idx->type)),
522+
(uint32_t) (ggml_webgpu_tensor_misalignment(ctx, dst) / ggml_type_size(dst->type)),
537523
// Convert byte-strides to element-strides
538524
(uint32_t) (src->nb[1] / ggml_type_size(src->type)),
539525
(uint32_t) (src->nb[2] / ggml_type_size(src->type)),
@@ -555,18 +541,18 @@ static void ggml_webgpu_set_rows(webgpu_context & ctx, ggml_tensor * src, ggml_t
555541

556542
std::vector<wgpu::BindGroupEntry> entries = {
557543
{ .binding = 0,
558-
.buffer = ggml_backend_webgpu_tensor_buf(src),
559-
.offset = ggml_backend_webgpu_tensor_offset(src),
560-
.size = ggml_webgpu_binding_size(src, src_misalignment) },
544+
.buffer = ggml_webgpu_tensor_buf(src),
545+
.offset = ggml_webgpu_tensor_align_offset(ctx, src),
546+
.size = ggml_webgpu_tensor_binding_size(ctx, src) },
561547
{ .binding = 1,
562-
.buffer = ggml_backend_webgpu_tensor_buf(idx),
563-
.offset = ggml_backend_webgpu_tensor_offset(idx),
564-
.size = ggml_webgpu_binding_size(idx, idx_misalignment) },
548+
.buffer = ggml_webgpu_tensor_buf(idx),
549+
.offset = ggml_webgpu_tensor_align_offset(ctx, idx),
550+
.size = ggml_webgpu_tensor_binding_size(ctx, idx) },
565551
{ .binding = 2,
566-
.buffer = ggml_backend_webgpu_tensor_buf(dst),
567-
.offset = ggml_backend_webgpu_tensor_offset(dst),
568-
.size = ggml_webgpu_binding_size(dst, dst_misalignment) },
569-
{ .binding = 3, .buffer = error_bufs.dev_buf, .offset = 0, .size = error_bufs.dev_buf.GetSize() }
552+
.buffer = ggml_webgpu_tensor_buf(dst),
553+
.offset = ggml_webgpu_tensor_align_offset(ctx, dst),
554+
.size = ggml_webgpu_tensor_binding_size(ctx, dst) },
555+
{ .binding = 3, .buffer = error_bufs.dev_buf, .offset = 0, .size = error_bufs.dev_buf.GetSize() }
570556
};
571557

572558
size_t max_wg_size = ctx->limits.maxComputeWorkgroupSizeX;
@@ -579,21 +565,10 @@ static void ggml_webgpu_set_rows(webgpu_context & ctx, ggml_tensor * src, ggml_t
579565
}
580566

581567
static void ggml_webgpu_mul_mat(webgpu_context & ctx, ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst) {
582-
size_t src0_offset = ggml_backend_webgpu_tensor_offset(src0);
583-
size_t src0_misalignment = src0_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
584-
// align to minimum offset alignment
585-
src0_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
586-
size_t src1_offset = ggml_backend_webgpu_tensor_offset(src1);
587-
size_t src1_misalignment = src1_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
588-
src1_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
589-
size_t dst_offset = ggml_backend_webgpu_tensor_offset(dst);
590-
size_t dst_misalignment = dst_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1);
591-
dst_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
592-
593568
std::vector<uint32_t> params = {
594-
(uint32_t) (src0_misalignment / ggml_type_size(src0->type)),
595-
(uint32_t) (src1_misalignment / ggml_type_size(src1->type)),
596-
(uint32_t) (dst_misalignment / ggml_type_size(dst->type)),
569+
(uint32_t) (ggml_webgpu_tensor_misalignment(ctx, src0) / ggml_type_size(src0->type)),
570+
(uint32_t) (ggml_webgpu_tensor_misalignment(ctx, src1) / ggml_type_size(src1->type)),
571+
(uint32_t) (ggml_webgpu_tensor_misalignment(ctx, dst) / ggml_type_size(dst->type)),
597572
(uint32_t) dst->ne[1], // number of rows in result (M)
598573
(uint32_t) dst->ne[0], // number of columns in result (N)
599574
(uint32_t) src0->ne[0], // number of columns in src0/src1 (K)
@@ -611,17 +586,17 @@ static void ggml_webgpu_mul_mat(webgpu_context & ctx, ggml_tensor * src0, ggml_t
611586

612587
std::vector<wgpu::BindGroupEntry> entries = {
613588
{ .binding = 0,
614-
.buffer = ggml_backend_webgpu_tensor_buf(src0),
615-
.offset = ggml_backend_webgpu_tensor_offset(src0),
616-
.size = ggml_webgpu_binding_size(src0, src0_misalignment) },
589+
.buffer = ggml_webgpu_tensor_buf(src0),
590+
.offset = ggml_webgpu_tensor_align_offset(ctx, src0),
591+
.size = ggml_webgpu_tensor_binding_size(ctx, src0) },
617592
{ .binding = 1,
618-
.buffer = ggml_backend_webgpu_tensor_buf(src1),
619-
.offset = ggml_backend_webgpu_tensor_offset(src1),
620-
.size = ggml_webgpu_binding_size(src1, src1_misalignment) },
593+
.buffer = ggml_webgpu_tensor_buf(src1),
594+
.offset = ggml_webgpu_tensor_align_offset(ctx, src1),
595+
.size = ggml_webgpu_tensor_binding_size(ctx, src1) },
621596
{ .binding = 2,
622-
.buffer = ggml_backend_webgpu_tensor_buf(dst),
623-
.offset = ggml_backend_webgpu_tensor_offset(dst),
624-
.size = ggml_webgpu_binding_size(dst, dst_misalignment) }
597+
.buffer = ggml_webgpu_tensor_buf(dst),
598+
.offset = ggml_webgpu_tensor_align_offset(ctx, dst),
599+
.size = ggml_webgpu_tensor_binding_size(ctx, dst) }
625600
};
626601

627602
uint32_t wg_x =

0 commit comments

Comments
 (0)