77
88#include  " ggml-wgsl-shaders.hpp" 
99
10+ #include  < cstring> 
1011#include  < iostream> 
12+ #include  < mutex> 
1113#include  < vector> 
1214
1315#ifdef  GGML_WEBGPU_DEBUG
@@ -131,7 +133,7 @@ static void ggml_webgpu_create_buffer(wgpu::Device &device, wgpu::Buffer &buffer
131133    buffer_desc.size  = size;
132134    buffer_desc.usage  = usage;
133135    buffer_desc.label  = label;
134-     buffer_desc.mappedAtCreation  = false ;  
136+     buffer_desc.mappedAtCreation  = false ;
135137    //  TODO: error handling
136138    buffer = device.CreateBuffer (&buffer_desc);
137139}
@@ -161,7 +163,7 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_context ctx, wgpu::Buffer b
161163    uint32_t  * params = (uint32_t  *) ctx->memset_params_host_buf .GetMappedRange ();
162164
163165    params[0 ] = (uint32_t )offset;
164-     params[1 ] = (uint32_t )size;  
166+     params[1 ] = (uint32_t )size;
165167    params[2 ] = value;
166168    ctx->memset_params_host_buf .Unmap ();
167169
@@ -184,8 +186,8 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_context ctx, wgpu::Buffer b
184186
185187    wgpu::CommandEncoder encoder = device.CreateCommandEncoder ();
186188    encoder.CopyBufferToBuffer (
187-         ctx->memset_params_host_buf , 0 ,  
188-         ctx->memset_params_dev_buf , 0 ,  
189+         ctx->memset_params_host_buf , 0 ,
190+         ctx->memset_params_dev_buf , 0 ,
189191        ctx->memset_params_dev_buf .GetSize ()
190192    );
191193    wgpu::ComputePassEncoder pass = encoder.BeginComputePass ();
@@ -206,7 +208,7 @@ static void ggml_backend_webgpu_wait_on_submission(webgpu_context ctx) {
206208            if  (status != wgpu::QueueWorkDoneStatus::Success) {
207209                GGML_LOG_ERROR (" ggml_webgpu: Failed to wait on queue: %s\n " data );
208210            }
209-         }),  
211+         }),
210212        UINT64_MAX
211213    );
212214}
@@ -243,7 +245,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
243245        case  GGML_OP_VIEW:
244246        case  GGML_OP_PERMUTE:
245247            return  false ;
246-          
248+ 
247249        case  GGML_OP_CPY: {
248250            std::lock_guard<std::mutex> lock (ctx->mutex );
249251            const  ggml_tensor * src = node->src [0 ];
@@ -259,7 +261,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
259261            dst_offset &= ~(ctx->limits .minStorageBufferOffsetAlignment  - 1 );
260262
261263            wgpu::Device device = ctx->device ;
262-             ggml_backend_webgpu_map_buffer (ctx, ctx->cpy_params_host_buf ,  
264+             ggml_backend_webgpu_map_buffer (ctx, ctx->cpy_params_host_buf ,
263265                wgpu::MapMode::Write, 0 , ctx->cpy_params_host_buf .GetSize ());
264266            uint32_t  * params = (uint32_t  *) ctx->cpy_params_host_buf .GetMappedRange ();
265267            uint32_t  ne = (uint32_t )ggml_nelements (node);
@@ -309,8 +311,8 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
309311
310312            wgpu::CommandEncoder encoder = device.CreateCommandEncoder ();
311313            encoder.CopyBufferToBuffer (
312-                 ctx->cpy_params_host_buf , 0 ,  
313-                 ctx->cpy_params_dev_buf , 0 ,  
314+                 ctx->cpy_params_host_buf , 0 ,
315+                 ctx->cpy_params_dev_buf , 0 ,
314316                ctx->cpy_params_dev_buf .GetSize ()
315317            );
316318            wgpu::ComputePassEncoder pass = encoder.BeginComputePass ();
@@ -343,7 +345,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
343345            wgpu::Device device = ctx->device ;
344346
345347            //  map the host parameters buffer
346-             ggml_backend_webgpu_map_buffer (ctx, ctx->mul_mat_params_host_buf ,  
348+             ggml_backend_webgpu_map_buffer (ctx, ctx->mul_mat_params_host_buf ,
347349                wgpu::MapMode::Write, 0 , ctx->mul_mat_params_host_buf .GetSize ());
348350            uint32_t  * params = (uint32_t  *) ctx->mul_mat_params_host_buf .GetMappedRange ();
349351
@@ -371,7 +373,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
371373            entries[0 ].offset  = src0_offset;
372374            entries[0 ].size  = ggml_nbytes (src0);
373375
374-             entries[1 ].binding  = 1 ;  
376+             entries[1 ].binding  = 1 ;
375377            entries[1 ].buffer  = src1_ctx->buffer ;
376378            entries[1 ].offset  = src1_offset;
377379            entries[1 ].size  = ggml_nbytes (src1);
@@ -395,8 +397,8 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
395397
396398            wgpu::CommandEncoder encoder = device.CreateCommandEncoder ();
397399            encoder.CopyBufferToBuffer (
398-                 ctx->mul_mat_params_host_buf , 0 ,  
399-                 ctx->mul_mat_params_dev_buf , 0 ,  
400+                 ctx->mul_mat_params_host_buf , 0 ,
401+                 ctx->mul_mat_params_dev_buf , 0 ,
400402                ctx->mul_mat_params_dev_buf .GetSize ()
401403            );
402404            wgpu::ComputePassEncoder pass = encoder.BeginComputePass ();
@@ -417,7 +419,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
417419            return  false ;
418420    }
419421}
420-   
422+ 
421423static  ggml_status ggml_backend_webgpu_graph_compute (ggml_backend_t  backend, struct  ggml_cgraph  * cgraph) {
422424    WEBGPU_LOG_DEBUG (" ggml_backend_webgpu_graph_compute(" n_nodes  << "  nodes)" 
423425
@@ -517,13 +519,13 @@ static void ggml_backend_webgpu_buffer_get_tensor(ggml_backend_buffer_t buffer,
517519
518520    std::lock_guard<std::mutex> lock (webgpu_ctx->mutex );
519521
520-     if  (webgpu_ctx->get_tensor_staging_buf  == nullptr  ||  
522+     if  (webgpu_ctx->get_tensor_staging_buf  == nullptr  ||
521523        webgpu_ctx->get_tensor_staging_buf .GetSize () < final_size) {
522524        //  Create a new staging buffer if it doesn't exist or is too small
523525        if  (webgpu_ctx->get_tensor_staging_buf ) {
524526            webgpu_ctx->get_tensor_staging_buf .Destroy ();
525527        }
526-         ggml_webgpu_create_buffer (device, webgpu_ctx->get_tensor_staging_buf , final_size,  
528+         ggml_webgpu_create_buffer (device, webgpu_ctx->get_tensor_staging_buf , final_size,
527529            wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead, " get_tensor_staging_buf" 
528530    }
529531
@@ -577,7 +579,7 @@ static ggml_backend_buffer_t ggml_backend_webgpu_buffer_type_alloc_buffer(ggml_b
577579    ggml_backend_webgpu_device_context * ctx = static_cast <ggml_backend_webgpu_device_context *>(buft->device ->context );
578580
579581    wgpu::Buffer buf;
580-     ggml_webgpu_create_buffer (ctx->webgpu_ctx ->device , buf, size,  
582+     ggml_webgpu_create_buffer (ctx->webgpu_ctx ->device , buf, size,
581583        wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst, " allocated_buffer" 
582584
583585    ggml_backend_webgpu_buffer_context * buf_ctx = new  ggml_backend_webgpu_buffer_context (ctx->webgpu_ctx , buf);
@@ -652,7 +654,7 @@ static void ggml_webgpu_init_memset_pipeline(webgpu_context webgpu_ctx) {
652654    constants[1 ].key  = " bytes_per_thread" 
653655    constants[1 ].value  = webgpu_ctx->memset_bytes_per_thread ;
654656    ggml_webgpu_create_pipeline (webgpu_ctx->device , webgpu_ctx->memset_pipeline , wgsl_memset, " memset" 
655-     ggml_webgpu_create_buffer (webgpu_ctx->device , webgpu_ctx->memset_params_dev_buf ,  
657+     ggml_webgpu_create_buffer (webgpu_ctx->device , webgpu_ctx->memset_params_dev_buf ,
656658        3  * sizeof (uint32_t ), //  3 parameters: buffer size, offset, value
657659        wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst, " memset_params_dev_buf" 
658660    ggml_webgpu_create_buffer (webgpu_ctx->device , webgpu_ctx->memset_params_host_buf ,
@@ -679,7 +681,7 @@ static void ggml_webgpu_init_cpy_pipeline(webgpu_context webgpu_ctx) {
679681        wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc, " cpy_params_host_buf" 
680682}
681683
682- //  TODO: Does this need to be  thread safe? Is it only called once? 
684+ //  TODO: Make  thread safe if multiple devices are used 
683685static  ggml_backend_t  ggml_backend_webgpu_device_init (ggml_backend_dev_t  dev, const  char  * params) {
684686    GGML_UNUSED (params);
685687
@@ -696,7 +698,7 @@ static ggml_backend_t ggml_backend_webgpu_device_init(ggml_backend_dev_t dev, co
696698        dev_desc.requiredLimits  = &webgpu_ctx->limits ;
697699        dev_desc.requiredFeatures  = webgpu_ctx->features .features ;
698700        dev_desc.requiredFeatureCount  = webgpu_ctx->features .featureCount ;
699-         dev_desc.SetDeviceLostCallback (wgpu::CallbackMode::AllowSpontaneous,  
701+         dev_desc.SetDeviceLostCallback (wgpu::CallbackMode::AllowSpontaneous,
700702            [](const  wgpu::Device& device, wgpu::DeviceLostReason reason, wgpu::StringView message) {
701703                GGML_UNUSED (device);
702704                GGML_LOG_ERROR (" ggml_webgpu: Device lost! Reason: %d, Message: %s\n " static_cast <int >(reason), message.data );
@@ -847,7 +849,7 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t
847849    device_ctx.device_name  = std::string (info.device .data );
848850    device_ctx.device_desc  = std::string (info.description .data );
849851
850-     GGML_LOG_INFO (" ggml_webgpu: adapter_info: vendor_id: %u | vendor: %s | architecture: %s | device_id: %u | name: %s | device_desc: %s\n "   
852+     GGML_LOG_INFO (" ggml_webgpu: adapter_info: vendor_id: %u | vendor: %s | architecture: %s | device_id: %u | name: %s | device_desc: %s\n " 
851853        info.vendorID , info.vendor .data , info.architecture .data , info.deviceID , info.device .data , info.description .data );
852854
853855    //  See GGML Backend Device Interface section
@@ -902,4 +904,4 @@ ggml_backend_t ggml_backend_webgpu_init(void) {
902904    return  ggml_backend_webgpu_device_init (dev, nullptr );
903905}
904906
905- GGML_BACKEND_DL_IMPL (ggml_backend_webgpu_reg)
907+ GGML_BACKEND_DL_IMPL (ggml_backend_webgpu_reg)
0 commit comments