@@ -56,14 +56,27 @@ class LazyReleaseCacheManager : public IBufferCacheManager {
5656 }
5757
5858 void ReleaseBuffer (WGPUBuffer buffer) override {
59- pending_buffers_.emplace_back (wgpu::Buffer::Acquire ( buffer) );
59+ pending_buffers_.emplace_back (buffer);
6060 }
6161
6262 void OnRefresh () override {
63+ Release ();
6364 pending_buffers_.clear ();
6465 }
6566
66- std::vector<wgpu::Buffer> pending_buffers_;
67+ public:
68+ ~LazyReleaseCacheManager () {
69+ Release ();
70+ }
71+
72+ protected:
73+ void Release () {
74+ for (auto & buffer : pending_buffers_) {
75+ wgpuBufferRelease (buffer);
76+ }
77+ }
78+
79+ std::vector<WGPUBuffer> pending_buffers_;
6780};
6881
6982class SimpleCacheManager : public IBufferCacheManager {
@@ -74,7 +87,7 @@ class SimpleCacheManager : public IBufferCacheManager {
7487 WGPUBuffer TryAcquireCachedBuffer (size_t buffer_size) override {
7588 auto it = buffers_.find (buffer_size);
7689 if (it != buffers_.end () && !it->second .empty ()) {
77- auto buffer = it->second .back (). MoveToCHandle () ;
90+ auto buffer = it->second .back ();
7891 it->second .pop_back ();
7992 return buffer;
8093 }
@@ -87,18 +100,31 @@ class SimpleCacheManager : public IBufferCacheManager {
87100 }
88101
89102 void ReleaseBuffer (WGPUBuffer buffer) override {
90- pending_buffers_.emplace_back (wgpu::Buffer::Acquire ( buffer) );
103+ pending_buffers_.emplace_back (buffer);
91104 }
92105
93106 void OnRefresh () override {
94107 for (auto & buffer : pending_buffers_) {
95- buffers_[static_cast <size_t >(buffer. GetSize ( ))].emplace_back (std::move ( buffer) );
108+ buffers_[static_cast <size_t >(wgpuBufferGetSize (buffer ))].emplace_back (buffer);
96109 }
97110 pending_buffers_.clear ();
98111 }
99112
100- std::map<size_t , std::vector<wgpu::Buffer>> buffers_;
101- std::vector<wgpu::Buffer> pending_buffers_;
113+ public:
114+ ~SimpleCacheManager () {
115+ for (auto & buffer : pending_buffers_) {
116+ wgpuBufferRelease (buffer);
117+ }
118+ for (auto & pair : buffers_) {
119+ for (auto & buffer : pair.second ) {
120+ wgpuBufferRelease (buffer);
121+ }
122+ }
123+ }
124+
125+ protected:
126+ std::map<size_t , std::vector<WGPUBuffer>> buffers_;
127+ std::vector<WGPUBuffer> pending_buffers_;
102128};
103129
104130// TODO: maybe use different bucket size for storage and uniform buffers?
@@ -155,7 +181,7 @@ class BucketCacheManager : public IBufferCacheManager {
155181 WGPUBuffer TryAcquireCachedBuffer (size_t buffer_size) override {
156182 auto it = buckets_.find (buffer_size);
157183 if (it != buckets_.end () && !it->second .empty ()) {
158- auto buffer = it->second .back (). MoveToCHandle () ;
184+ auto buffer = it->second .back ();
159185 it->second .pop_back ();
160186 return buffer;
161187 }
@@ -167,31 +193,44 @@ class BucketCacheManager : public IBufferCacheManager {
167193 }
168194
169195 void ReleaseBuffer (WGPUBuffer buffer) override {
170- pending_buffers_.emplace_back (wgpu::Buffer::Acquire ( buffer) );
196+ pending_buffers_.emplace_back (buffer);
171197 }
172198
173199 void OnRefresh () override {
174200 // TODO: consider graph capture. currently not supported
175201
176202 for (auto & buffer : pending_buffers_) {
177- auto buffer_size = static_cast <size_t >(buffer. GetSize ( ));
203+ auto buffer_size = static_cast <size_t >(wgpuBufferGetSize (buffer ));
178204
179205 auto it = buckets_.find (buffer_size);
180206 if (it != buckets_.end () && it->second .size () < buckets_limit_[buffer_size]) {
181- it->second .emplace_back (std::move (buffer));
207+ it->second .emplace_back (buffer);
208+ } else {
209+ wgpuBufferRelease (buffer);
182210 }
183211 }
184212
185213 pending_buffers_.clear ();
186214 }
187215
216+ ~BucketCacheManager () {
217+ for (auto & buffer : pending_buffers_) {
218+ wgpuBufferRelease (buffer);
219+ }
220+ for (auto & pair : buckets_) {
221+ for (auto & buffer : pair.second ) {
222+ wgpuBufferRelease (buffer);
223+ }
224+ }
225+ }
226+
188227 protected:
189228 void Initialize () {
190229 buckets_keys_.reserve (buckets_limit_.size ());
191230 buckets_.reserve (buckets_limit_.size ());
192231 for (const auto & pair : buckets_limit_) {
193232 buckets_keys_.push_back (pair.first );
194- buckets_.emplace (pair.first , std::vector<wgpu::Buffer >());
233+ buckets_.emplace (pair.first , std::vector<WGPUBuffer >());
195234 }
196235 std::sort (buckets_keys_.begin (), buckets_keys_.end ());
197236
@@ -205,8 +244,8 @@ class BucketCacheManager : public IBufferCacheManager {
205244#endif
206245 }
207246 std::unordered_map<size_t , size_t > buckets_limit_;
208- std::unordered_map<size_t , std::vector<wgpu::Buffer >> buckets_;
209- std::vector<wgpu::Buffer > pending_buffers_;
247+ std::unordered_map<size_t , std::vector<WGPUBuffer >> buckets_;
248+ std::vector<WGPUBuffer > pending_buffers_;
210249 std::vector<size_t > buckets_keys_;
211250};
212251
@@ -255,11 +294,10 @@ BufferManager::BufferManager(WebGpuContext& context, BufferCacheMode storage_buf
255294
256295void BufferManager::Upload (void * src, WGPUBuffer dst, size_t size) {
257296 // If the buffer is mapped, we can directly write to it.
258- wgpu::Buffer dst_buffer = dst;
259- auto mapped_data = dst_buffer.GetMappedRange ();
297+ void * mapped_data = wgpuBufferGetMappedRange (dst, 0 , WGPU_WHOLE_MAP_SIZE); // ensure the buffer is mapped
260298 if (mapped_data) {
261299 memcpy (mapped_data, src, size);
262- dst_buffer. Unmap ( );
300+ wgpuBufferUnmap (dst );
263301 return ;
264302 }
265303
@@ -288,17 +326,19 @@ void BufferManager::MemCpy(WGPUBuffer src, WGPUBuffer dst, size_t size) {
288326 EnforceBufferUnmapped (context_, dst);
289327
290328 auto buffer_size = NormalizeBufferSize (size);
291- ORT_ENFORCE (buffer_size <= wgpuBufferGetSize (src) && buffer_size <= wgpuBufferGetSize (dst),
329+ auto src_size = static_cast <size_t >(wgpuBufferGetSize (src));
330+ auto dst_size = static_cast <size_t >(wgpuBufferGetSize (dst));
331+ ORT_ENFORCE (buffer_size <= src_size && buffer_size <= dst_size,
292332 " Source and destination buffers must have enough space for the copy operation. src_size=" ,
293- wgpuBufferGetSize (src) , " , dst_size=" , wgpuBufferGetSize (dst) , " , copy_size=" , buffer_size, " ." );
333+ src_size , " , dst_size=" , dst_size , " , copy_size=" , buffer_size, " ." );
294334
295335 auto & command_encoder = context_.GetCommandEncoder ();
296336 context_.EndComputePass ();
297337 command_encoder.CopyBufferToBuffer (src, 0 , dst, 0 , buffer_size);
298338}
299339
300340WGPUBuffer BufferManager::Create (size_t size, wgpu::BufferUsage usage) {
301- auto & cache = GetCacheManager (static_cast <WGPUBufferUsage>( usage) );
341+ auto & cache = GetCacheManager (usage);
302342 auto buffer_size = cache.CalculateBufferSize (size);
303343
304344 auto buffer = cache.TryAcquireCachedBuffer (buffer_size);
@@ -310,7 +350,6 @@ WGPUBuffer BufferManager::Create(size_t size, wgpu::BufferUsage usage) {
310350 wgpu::BufferDescriptor desc{};
311351 desc.size = buffer_size;
312352 desc.usage = usage;
313- // desc.label = std::to_string(xx++).c_str();
314353 buffer = context_.Device ().CreateBuffer (&desc).MoveToCHandle ();
315354
316355 ORT_ENFORCE (buffer, " Failed to create GPU buffer: size=" , buffer_size, " , usage=" , uint64_t (usage), " ." );
@@ -320,14 +359,16 @@ WGPUBuffer BufferManager::Create(size_t size, wgpu::BufferUsage usage) {
320359}
321360
322361WGPUBuffer BufferManager::CreateUMA (size_t size, wgpu::BufferUsage usage) {
323- ORT_ENFORCE (usage & wgpu::BufferUsage::Storage, " UMA buffer must have storage usage ." );
324- auto & cache = GetCacheManager (static_cast <WGPUBufferUsage>( usage) );
362+ ORT_ENFORCE (usage & wgpu::BufferUsage::Storage, " UMA buffer must be a storage buffer ." );
363+ auto & cache = GetCacheManager (usage);
325364 auto buffer_size = cache.CalculateBufferSize (size);
326365
366+ // Ensure the buffer is mapped for writing at creation.
367+ usage |= wgpu::BufferUsage::MapWrite;
368+
327369 wgpu::BufferDescriptor desc{};
328370 desc.size = buffer_size;
329- // Ensure the buffer is mapped for writing at creation.
330- desc.usage = usage | wgpu::BufferUsage::MapWrite;
371+ desc.usage = usage;
331372 desc.mappedAtCreation = true ;
332373 auto buffer = context_.Device ().CreateBuffer (&desc).MoveToCHandle ();
333374
@@ -373,20 +414,21 @@ void BufferManager::RefreshPendingBuffers() {
373414 default_cache_->OnRefresh ();
374415}
375416
376- IBufferCacheManager& BufferManager::GetCacheManager (WGPUBufferUsage usage) const {
377- if (usage & WGPUBufferUsage_Storage ) {
417+ IBufferCacheManager& BufferManager::GetCacheManager (wgpu::BufferUsage usage) const {
418+ if (usage & wgpu::BufferUsage::Storage ) {
378419 return *storage_cache_;
379- } else if (usage & WGPUBufferUsage_Uniform ) {
420+ } else if (usage & wgpu::BufferUsage::Uniform ) {
380421 return *uniform_cache_;
381- } else if (usage & WGPUBufferUsage_QueryResolve ) {
422+ } else if (usage & wgpu::BufferUsage::QueryResolve ) {
382423 return *query_resolve_cache_;
383424 } else {
384425 return *default_cache_;
385426 }
386427}
387428
388429IBufferCacheManager& BufferManager::GetCacheManager (WGPUBuffer buffer) const {
389- return GetCacheManager (wgpuBufferGetUsage (buffer));
430+ auto usage = static_cast <wgpu::BufferUsage>(wgpuBufferGetUsage (buffer));
431+ return GetCacheManager (usage);
390432}
391433
392434std::unique_ptr<BufferManager> BufferManagerFactory::Create (WebGpuContext& context, BufferCacheMode storage_buffer_cache_mode, BufferCacheMode uniform_buffer_cache_mode, BufferCacheMode query_resolve_buffer_cache_mode) {
0 commit comments