@@ -235,15 +235,6 @@ static void ggml_webgpu_create_buffer(wgpu::Device & device,
235235
236236/* * End WebGPU object initializations */
237237
238- /* * Utility Functions */
239-
240- size_t ggml_webgpu_binding_size (ggml_tensor * t, size_t misalignment) {
241- return (ggml_nbytes (t) + misalignment + WEBGPU_STORAGE_BUF_BINDING_MULT - 1 ) &
242- ~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1 );
243- }
244-
245- /* * End Utility Functions */
246-
247238/* * WebGPU Actions */
248239
249240// Wait for the queue to finish processing all submitted work
@@ -438,15 +429,6 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_context & ctx,
438429 ggml_backend_webgpu_build_and_enqueue (ctx, ctx->memset_pipeline , params, entries, wg_x, true );
439430}
440431
441- static size_t ggml_backend_webgpu_tensor_offset (const ggml_tensor * tensor) {
442- return webgpu_tensor_offset (tensor) + tensor->view_offs ;
443- }
444-
445- static wgpu::Buffer ggml_backend_webgpu_tensor_buf (const ggml_tensor * tensor) {
446- ggml_backend_webgpu_buffer_context * ctx = (ggml_backend_webgpu_buffer_context *) tensor->buffer ->context ;
447- return ctx->buffer ;
448- }
449-
450432/* * End WebGPU Actions */
451433
452434/* * GGML Backend Interface */
@@ -464,19 +446,35 @@ static void ggml_backend_webgpu_free(ggml_backend_t backend) {
464446 GGML_UNUSED (ctx);
465447}
466448
449+ static size_t ggml_webgpu_tensor_offset (const ggml_tensor * tensor) {
450+ return webgpu_tensor_offset (tensor) + tensor->view_offs ;
451+ }
452+
453+ static wgpu::Buffer ggml_webgpu_tensor_buf (const ggml_tensor * tensor) {
454+ ggml_backend_webgpu_buffer_context * ctx = (ggml_backend_webgpu_buffer_context *) tensor->buffer ->context ;
455+ return ctx->buffer ;
456+ }
457+ static size_t ggml_webgpu_tensor_misalignment (webgpu_context & ctx, ggml_tensor * t) {
458+ size_t offset = ggml_webgpu_tensor_offset (t);
459+ return offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
460+ }
461+
462+ static size_t ggml_webgpu_tensor_align_offset (webgpu_context & ctx, ggml_tensor * t) {
463+ size_t offset = ggml_webgpu_tensor_offset (t);
464+ return offset & ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
465+ }
466+
467+ static size_t ggml_webgpu_tensor_binding_size (webgpu_context & ctx, ggml_tensor * t) {
468+ return (ggml_nbytes (t) + ggml_webgpu_tensor_misalignment (ctx, t) + WEBGPU_STORAGE_BUF_BINDING_MULT - 1 ) &
469+ ~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1 );
470+ }
471+
467472static void ggml_webgpu_cpy (webgpu_context & ctx, ggml_tensor * src, ggml_tensor * dst) {
468- size_t src_offset = ggml_backend_webgpu_tensor_offset (src);
469- // assumes power of 2 offset alignment
470- size_t src_misalignment = src_offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
471- // align to minimum offset alignment
472- src_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
473- size_t dst_offset = ggml_backend_webgpu_tensor_offset (dst);
474- size_t dst_misalignment = dst_offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
475- dst_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
476- uint32_t ne = (uint32_t ) ggml_nelements (dst);
473+ uint32_t ne = (uint32_t ) ggml_nelements (dst);
474+
477475 std::vector<uint32_t > params = { ne,
478- (uint32_t ) (src_misalignment / ggml_type_size (src->type )),
479- (uint32_t ) (dst_misalignment / ggml_type_size (dst->type )),
476+ (uint32_t ) (ggml_webgpu_tensor_misalignment (ctx, src) / ggml_type_size (src->type )),
477+ (uint32_t ) (ggml_webgpu_tensor_misalignment (ctx, dst) / ggml_type_size (dst->type )),
480478 // Convert byte-strides to element-strides
481479 (uint32_t ) (src->nb [0 ] / ggml_type_size (src->type )),
482480 (uint32_t ) (src->nb [1 ] / ggml_type_size (src->type )),
@@ -494,13 +492,13 @@ static void ggml_webgpu_cpy(webgpu_context & ctx, ggml_tensor * src, ggml_tensor
494492
495493 std::vector<wgpu::BindGroupEntry> entries = {
496494 { .binding = 0 ,
497- .buffer = ggml_backend_webgpu_tensor_buf (src),
498- .offset = src_offset ,
499- .size = ggml_webgpu_binding_size (src, src_misalignment ) },
495+ .buffer = ggml_webgpu_tensor_buf (src),
496+ .offset = ggml_webgpu_tensor_align_offset (ctx, src) ,
497+ .size = ggml_webgpu_tensor_binding_size (ctx, src ) },
500498 { .binding = 1 ,
501- .buffer = ggml_backend_webgpu_tensor_buf (dst),
502- .offset = dst_offset ,
503- .size = ggml_webgpu_binding_size (dst, dst_misalignment ) }
499+ .buffer = ggml_webgpu_tensor_buf (dst),
500+ .offset = ggml_webgpu_tensor_align_offset (ctx, dst) ,
501+ .size = ggml_webgpu_tensor_binding_size (ctx, dst ) }
504502 };
505503
506504 size_t max_wg_size = ctx->limits .maxComputeWorkgroupSizeX ;
@@ -519,21 +517,9 @@ static void ggml_webgpu_set_rows(webgpu_context & ctx, ggml_tensor * src, ggml_t
519517 error_bufs.host_buf .Unmap ();
520518 }
521519
522- size_t src_offset = ggml_backend_webgpu_tensor_offset (src);
523- // assumes power of 2 offset alignment
524- size_t src_misalignment = src_offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
525- // align to minimum offset alignment
526- src_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
527- size_t idx_offset = ggml_backend_webgpu_tensor_offset (idx);
528- size_t idx_misalignment = idx_offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
529- idx_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
530- size_t dst_offset = ggml_backend_webgpu_tensor_offset (dst);
531- size_t dst_misalignment = dst_offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
532- dst_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
533-
534- std::vector<uint32_t > params = { (uint32_t ) (src_misalignment / ggml_type_size (src->type )),
535- (uint32_t ) (idx_misalignment / ggml_type_size (idx->type )),
536- (uint32_t ) (dst_misalignment / ggml_type_size (dst->type )),
520+ std::vector<uint32_t > params = { (uint32_t ) (ggml_webgpu_tensor_misalignment (ctx, src) / ggml_type_size (src->type )),
521+ (uint32_t ) (ggml_webgpu_tensor_misalignment (ctx, idx) / ggml_type_size (idx->type )),
522+ (uint32_t ) (ggml_webgpu_tensor_misalignment (ctx, dst) / ggml_type_size (dst->type )),
537523 // Convert byte-strides to element-strides
538524 (uint32_t ) (src->nb [1 ] / ggml_type_size (src->type )),
539525 (uint32_t ) (src->nb [2 ] / ggml_type_size (src->type )),
@@ -555,18 +541,18 @@ static void ggml_webgpu_set_rows(webgpu_context & ctx, ggml_tensor * src, ggml_t
555541
556542 std::vector<wgpu::BindGroupEntry> entries = {
557543 { .binding = 0 ,
558- .buffer = ggml_backend_webgpu_tensor_buf (src),
559- .offset = ggml_backend_webgpu_tensor_offset ( src),
560- .size = ggml_webgpu_binding_size (src, src_misalignment ) },
544+ .buffer = ggml_webgpu_tensor_buf (src),
545+ .offset = ggml_webgpu_tensor_align_offset (ctx, src),
546+ .size = ggml_webgpu_tensor_binding_size (ctx, src ) },
561547 { .binding = 1 ,
562- .buffer = ggml_backend_webgpu_tensor_buf (idx),
563- .offset = ggml_backend_webgpu_tensor_offset ( idx),
564- .size = ggml_webgpu_binding_size (idx, idx_misalignment ) },
548+ .buffer = ggml_webgpu_tensor_buf (idx),
549+ .offset = ggml_webgpu_tensor_align_offset (ctx, idx),
550+ .size = ggml_webgpu_tensor_binding_size (ctx, idx ) },
565551 { .binding = 2 ,
566- .buffer = ggml_backend_webgpu_tensor_buf (dst),
567- .offset = ggml_backend_webgpu_tensor_offset ( dst),
568- .size = ggml_webgpu_binding_size (dst, dst_misalignment ) },
569- { .binding = 3 , .buffer = error_bufs.dev_buf , .offset = 0 , .size = error_bufs.dev_buf .GetSize () }
552+ .buffer = ggml_webgpu_tensor_buf (dst),
553+ .offset = ggml_webgpu_tensor_align_offset (ctx, dst),
554+ .size = ggml_webgpu_tensor_binding_size (ctx, dst ) },
555+ { .binding = 3 , .buffer = error_bufs.dev_buf , .offset = 0 , .size = error_bufs.dev_buf .GetSize () }
570556 };
571557
572558 size_t max_wg_size = ctx->limits .maxComputeWorkgroupSizeX ;
@@ -579,21 +565,10 @@ static void ggml_webgpu_set_rows(webgpu_context & ctx, ggml_tensor * src, ggml_t
579565}
580566
581567static void ggml_webgpu_mul_mat (webgpu_context & ctx, ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst) {
582- size_t src0_offset = ggml_backend_webgpu_tensor_offset (src0);
583- size_t src0_misalignment = src0_offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
584- // align to minimum offset alignment
585- src0_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
586- size_t src1_offset = ggml_backend_webgpu_tensor_offset (src1);
587- size_t src1_misalignment = src1_offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
588- src1_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
589- size_t dst_offset = ggml_backend_webgpu_tensor_offset (dst);
590- size_t dst_misalignment = dst_offset & (ctx->limits .minStorageBufferOffsetAlignment - 1 );
591- dst_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment - 1 );
592-
593568 std::vector<uint32_t > params = {
594- (uint32_t ) (src0_misalignment / ggml_type_size (src0->type )),
595- (uint32_t ) (src1_misalignment / ggml_type_size (src1->type )),
596- (uint32_t ) (dst_misalignment / ggml_type_size (dst->type )),
569+ (uint32_t ) (ggml_webgpu_tensor_misalignment (ctx, src0) / ggml_type_size (src0->type )),
570+ (uint32_t ) (ggml_webgpu_tensor_misalignment (ctx, src1) / ggml_type_size (src1->type )),
571+ (uint32_t ) (ggml_webgpu_tensor_misalignment (ctx, dst) / ggml_type_size (dst->type )),
597572 (uint32_t ) dst->ne [1 ], // number of rows in result (M)
598573 (uint32_t ) dst->ne [0 ], // number of columns in result (N)
599574 (uint32_t ) src0->ne [0 ], // number of columns in src0/src1 (K)
@@ -611,17 +586,17 @@ static void ggml_webgpu_mul_mat(webgpu_context & ctx, ggml_tensor * src0, ggml_t
611586
612587 std::vector<wgpu::BindGroupEntry> entries = {
613588 { .binding = 0 ,
614- .buffer = ggml_backend_webgpu_tensor_buf (src0),
615- .offset = ggml_backend_webgpu_tensor_offset ( src0),
616- .size = ggml_webgpu_binding_size (src0, src0_misalignment ) },
589+ .buffer = ggml_webgpu_tensor_buf (src0),
590+ .offset = ggml_webgpu_tensor_align_offset (ctx, src0),
591+ .size = ggml_webgpu_tensor_binding_size (ctx, src0 ) },
617592 { .binding = 1 ,
618- .buffer = ggml_backend_webgpu_tensor_buf (src1),
619- .offset = ggml_backend_webgpu_tensor_offset ( src1),
620- .size = ggml_webgpu_binding_size (src1, src1_misalignment ) },
593+ .buffer = ggml_webgpu_tensor_buf (src1),
594+ .offset = ggml_webgpu_tensor_align_offset (ctx, src1),
595+ .size = ggml_webgpu_tensor_binding_size (ctx, src1 ) },
621596 { .binding = 2 ,
622- .buffer = ggml_backend_webgpu_tensor_buf (dst),
623- .offset = ggml_backend_webgpu_tensor_offset ( dst),
624- .size = ggml_webgpu_binding_size (dst, dst_misalignment) }
597+ .buffer = ggml_webgpu_tensor_buf (dst),
598+ .offset = ggml_webgpu_tensor_align_offset (ctx, dst),
599+ .size = ggml_webgpu_tensor_binding_size (ctx, dst) }
625600 };
626601
627602 uint32_t wg_x =
0 commit comments