@@ -156,195 +156,196 @@ const ggml_cann_device_info& ggml_cann_info() {
156156 * This class manages a pool of CANN buffers for a specific device. 
157157 */  
158158struct  ggml_cann_pool_buf_prio  : public  ggml_cann_pool  {
159-         /* *
160-          * @brief The maximum reuse margin for a buffer. 
161-          */  
162-         static  const  size_t  max_reuse_margin = 1ull  << 22 ;  //  4MB
163- 
164-         /* *
165-          * @brief The minimum free margin for a buffer. 
166-          */  
167-         static  const  size_t  min_free_margin = 1ull  << 20 ;   //  1MB
168- 
169-         /* *
170-          * @brief The alignment for buffer allocation. 
171-          */  
172-         static  const  size_t  alignment = 128 ;
173- 
174-         /* *
175-          * @brief The device ID associated with this buffer pool. 
176-          */  
177-         int  device;
178- 
179-         /* *
180-          * @brief Whether to disable clean during buffer allocation. 
181-          */  
182-         bool  disable_clean = false ;
183- 
184-         /* *
185-          * @brief Structure representing a CANN buffer. 
186-          */  
187-         struct  ggml_cann_buffer  {
188-             void * ptr = nullptr ;  // /< Pointer to the buffer.
189-             size_t  size = 0 ;      // /< Size of the buffer.
190-             std::chrono::steady_clock::time_point last_used;  // /< Last used time.
191- 
192-             bool  operator >(const  ggml_cann_buffer& other) const  {
193-                 return  size > other.size ;
194-             }
195-         };
196- 
197-         /* *
198-          * @brief Array of CANN buffers in the pool. 
199-          */  
200-         std::unordered_map<void *, size_t > buffer_pool;
201-         std::priority_queue<ggml_cann_buffer,
202-                             std::vector<ggml_cann_buffer>,
203-                             std::greater<>> free_buffers ;
204- 
205-         /* *
206-          * @brief Total size of all buffers in the pool. 
207-          */  
208-         size_t  pool_size = 0 ;
209- 
210-         /* *
211-          * @brief Constructor to initialize the buffer pool for a specific device. 
212-          * 
213-          * @param device The device ID to associate with this buffer pool. 
214-          */  
215-         explicit  ggml_cann_pool_buf_prio (int  device) : device(device) {
216-             disable_clean = getenv (" GGML_CANN_DISABLE_BUF_POOL_CLEAN"  ) != nullptr ;
159+     /* *
160+      * @brief The maximum reuse margin for a buffer. 
161+      */  
162+     static  const  size_t  max_reuse_margin = 1ull  << 22 ;  //  4MB
163+ 
164+     /* *
165+      * @brief The minimum free margin for a buffer. 
166+      */  
167+     static  const  size_t  min_free_margin = 1ull  << 20 ;   //  1MB
168+ 
169+     /* *
170+      * @brief The alignment for buffer allocation. 
171+      */  
172+     static  const  size_t  alignment = 128 ;
173+ 
174+     /* *
175+      * @brief The device ID associated with this buffer pool. 
176+      */  
177+     int  device;
178+ 
179+     /* *
180+      * @brief Whether to disable clean during buffer allocation. 
181+      */  
182+     bool  disable_clean = false ;
183+ 
184+     /* *
185+      * @brief Structure representing a CANN buffer. 
186+      */  
187+     struct  ggml_cann_buffer  {
188+         void * ptr = nullptr ;  // /< Pointer to the buffer.
189+         size_t  size = 0 ;      // /< Size of the buffer.
190+         std::chrono::steady_clock::time_point last_used;  // /< Last used time.
191+ 
192+         bool  operator >(const  ggml_cann_buffer& other) const  {
193+             return  size > other.size ;
217194        }
195+     };
218196
219-         /* *
220-          * @brief Destructor to free all buffers in the pool. 
221-          */  
222-         ~ggml_cann_pool_buf_prio () {
223-             ggml_cann_set_device (device);
224-             for  (auto & [b_ptr, b_size] : buffer_pool) {
225-                 aclrtFree (b_ptr);
226-                pool_size -= b_size;
227-             }
228-             buffer_pool.clear ();
229-             GGML_ASSERT (pool_size == 0 );
197+     /* *
198+      * @brief Array of CANN buffers in the pool. 
199+      */  
200+     std::unordered_map<void *, size_t > buffer_pool;
201+     std::priority_queue<ggml_cann_buffer,
202+                         std::vector<ggml_cann_buffer>,
203+                         std::greater<>> free_buffers ;
204+ 
205+     /* *
206+      * @brief Total size of all buffers in the pool. 
207+      */  
208+     size_t  pool_size = 0 ;
209+ 
210+     /* *
211+      * @brief Constructor to initialize the buffer pool for a specific device. 
212+      * 
213+      * @param device The device ID to associate with this buffer pool. 
214+      */  
215+     explicit  ggml_cann_pool_buf_prio (int  device) : device(device) {
216+         disable_clean = getenv (" GGML_CANN_DISABLE_BUF_POOL_CLEAN"  ) != nullptr ;
217+     }
218+ 
219+     /* *
220+      * @brief Destructor to free all buffers in the pool. 
221+      */  
222+     ~ggml_cann_pool_buf_prio () {
223+         ggml_cann_set_device (device);
224+         for  (auto & [b_ptr, b_size] : buffer_pool) {
225+             aclrtFree (b_ptr);
226+             pool_size -= b_size;
230227        }
228+         buffer_pool.clear ();
229+         GGML_ASSERT (pool_size == 0 );
230+     }
231231
232-          /* *
233-           * @brief Allocate a buffer of the given size. 
234-           * 
235-           * @param size The size of the buffer to allocate. 
236-           * @param actual_size A pointer to a variable to receive the actual size of 
237-           * the allocated buffer. 
238-           * @return A pointer to the allocated buffer. 
239-           */  
240-          void * alloc (size_t  size, size_t * actual_size) override  {
241-              size = GGML_PAD (size, alignment);
242-              if  (size == 0 ) {
243-                  size = alignment;
244-              }
232+     /* *
233+      * @brief Allocate a buffer of the given size. 
234+      * 
235+      * @param size The size of the buffer to allocate. 
236+      * @param actual_size A pointer to a variable to receive the actual size of 
237+      * the allocated buffer. 
238+      * @return A pointer to the allocated buffer. 
239+      */  
240+     void * alloc (size_t  size, size_t * actual_size) override  {
241+         size = GGML_PAD (size, alignment);
242+         if  (size == 0 ) {
243+             size = alignment;
244+         }
245245
246-             void * ptr = nullptr ;
247-             auto  now = std::chrono::steady_clock::now ();
248- 
249-             std::vector<ggml_cann_buffer> free_buffers_rest;
250-             free_buffers_rest.reserve (free_buffers.size ());
251-             while  (!free_buffers.empty ()) {
252-                 auto  b = free_buffers.top ();
253-                 free_buffers.pop ();
254- 
255-                 if  (b.size  >= size) {
256-                     //  reuse the buffer if the size is enough
257-                     const  size_t  margin = b.size  - size;
258-                     if  (margin <= max_reuse_margin) {
259-                         *actual_size = b.size ;
260-                         ptr = b.ptr ;
261-     #ifdef  DEBUG_CANN_MALLOC
262-                         GGML_LOG_INFO (
263-                             " cann pool[%d]: reused   %p, " 
264-                             " pool_size = %5u MB, " 
265-                             " size = %5u MB, " 
266-                             " margin = %5u MB\n "  ,
267-                             device, b.ptr ,
268-                             (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
269-                             (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ),
270-                             (uint32_t )(GGML_PAD (margin, 1048576 ) / 1048576 ));
271-     #endif 
272-                         break ;
273-                     }
274-                 }
246+         void * ptr = nullptr ;
247+         auto  now = std::chrono::steady_clock::now ();
248+ 
249+         std::vector<ggml_cann_buffer> free_buffers_rest;
250+         free_buffers_rest.reserve (free_buffers.size ());
251+         while  (!free_buffers.empty ()) {
252+             auto  b = free_buffers.top ();
253+             free_buffers.pop ();
275254
276-                 bool  should_clean = !disable_clean &&
277-                                    b.size  > min_free_margin &&
278-                                    std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used ).count () > 100 ;
279-                 if  (should_clean) {
280-                     //  free the buffer if the size is needed to be freed
281-                     ACL_CHECK (aclrtFree (b.ptr ));
282-                     pool_size -= b.size ;
283-                     buffer_pool.erase (b.ptr );
284-     #ifdef  DEBUG_CANN_MALLOC
255+             if  (b.size  >= size) {
256+                 //  reuse the buffer if the size is enough
257+                 const  size_t  margin = b.size  - size;
258+                 if  (margin <= max_reuse_margin) {
259+                     *actual_size = b.size ;
260+                     ptr = b.ptr ;
261+ #ifdef  DEBUG_CANN_MALLOC
285262                    GGML_LOG_INFO (
286-                         " cann pool[%d]: clean     %p, " 
263+                         " cann pool[%d]: reused    %p, " 
287264                        " pool_size = %5u MB, " 
288-                         " size = %5u MB\n "  ,
265+                         " size = %5u MB, " 
266+                         " margin = %5u MB\n "  ,
289267                        device, b.ptr ,
290268                        (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
291-                         (uint32_t )(GGML_PAD (b.size , 1048576 ) / 1048576 ));
292-     #endif 
293-                     continue ;
269+                         (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ),
270+                         (uint32_t )(GGML_PAD (margin, 1048576 ) / 1048576 ));
271+ #endif 
272+                     break ;
294273                }
295-                 free_buffers_rest.push_back (b);
296-             }
297-             for  (ggml_cann_buffer &b : free_buffers_rest) {
298-                 free_buffers.push (std::move (b));
299274            }
300275
301-     #ifdef  DEBUG_CANN_MALLOC
302-             GGML_LOG_INFO (" cann pool[%d] free pool_size = %5u MB\n\n "  , device, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
303-     #endif 
304-             if  (ptr != nullptr ) {
305-                 return  ptr;
276+             bool  should_clean = !disable_clean &&
277+                                 b.size  > min_free_margin &&
278+                                 std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used ).count () > 100 ;
279+             if  (should_clean) {
280+                 //  free the buffer if the size is needed to be freed
281+                 ACL_CHECK (aclrtFree (b.ptr ));
282+                 pool_size -= b.size ;
283+                 buffer_pool.erase (b.ptr );
284+ #ifdef  DEBUG_CANN_MALLOC
285+                 GGML_LOG_INFO (
286+                     " cann pool[%d]: clean    %p, " 
287+                     " pool_size = %5u MB, " 
288+                     " size = %5u MB\n "  ,
289+                     device, b.ptr ,
290+                     (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
291+                     (uint32_t )(GGML_PAD (b.size , 1048576 ) / 1048576 ));
292+ #endif 
293+                 continue ;
306294            }
295+             free_buffers_rest.push_back (b);
296+         }
297+         for  (ggml_cann_buffer &b : free_buffers_rest) {
298+             free_buffers.push (std::move (b));
299+         }
307300
308-             //  allocate a new buffer if no buffer can be reused
309-             ggml_cann_set_device (device);
310-             ACL_CHECK (aclrtMalloc (&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
311-             *actual_size = size;
312-             pool_size += size;
313-     #ifdef  DEBUG_CANN_MALLOC
314-             GGML_LOG_INFO (
315-                 " cann pool[%d]: allocate %p, " 
316-                 " pool_size = %5u MB, " 
317-                 " size = %5u MB\n "  ,
318-                 device, ptr, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
319-                 (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ));
320-     #endif 
321-             buffer_pool.emplace (ptr, size);
301+ #ifdef  DEBUG_CANN_MALLOC
302+         GGML_LOG_INFO (" cann pool[%d] free pool_size = %5u MB\n\n "  , device, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
303+ #endif 
304+         if  (ptr != nullptr ) {
322305            return  ptr;
323306        }
324307
325-         /* *
326-          * @brief Free a buffer and return it to the pool. 
327-          * 
328-          * @param ptr Pointer to the buffer to free. 
329-          * @param size Size of the buffer to free. 
330-          */  
331-         void  free (void * ptr, size_t  size) override  {
332-             auto  it = buffer_pool.find (ptr);
333-             if  (it == buffer_pool.end ()) {
334-                 GGML_ABORT (" cann pool[%d]: buffer %p not found in pool\n "  , device, ptr);
335-             }
308+         //  allocate a new buffer if no buffer can be reused
309+         ggml_cann_set_device (device);
310+         ACL_CHECK (aclrtMalloc (&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
311+         *actual_size = size;
312+         pool_size += size;
313+ #ifdef  DEBUG_CANN_MALLOC
314+         GGML_LOG_INFO (
315+             " cann pool[%d]: allocate %p, " 
316+             " pool_size = %5u MB, " 
317+             " size = %5u MB\n "  ,
318+             device, ptr, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
319+             (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ));
320+ #endif 
321+         buffer_pool.emplace (ptr, size);
322+         return  ptr;
323+     }
336324
337-             auto  now = std::chrono::steady_clock::now ();
338-             free_buffers.emplace (ggml_cann_buffer{ptr, it->second , now});
339-     #ifdef  DEBUG_CANN_MALLOC
340-             GGML_LOG_INFO (
341-                 " cann pool[%d]: return   %p, " 
342-                 " pool_size = %5u MB\n "  ,
343-                 device, ptr,
344-                 (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
345-     #endif 
325+     /* *
326+      * @brief Free a buffer and return it to the pool. 
327+      * 
328+      * @param ptr Pointer to the buffer to free. 
329+      * @param size Size of the buffer to free. 
330+      */  
331+     void  free (void * ptr, size_t  size) override  {
332+         GGML_UNUSED (size);
333+         auto  it = buffer_pool.find (ptr);
334+         if  (it == buffer_pool.end ()) {
335+             GGML_ABORT (" cann pool[%d]: buffer %p not found in pool\n "  , device, ptr);
346336        }
347-     };
337+ 
338+         auto  now = std::chrono::steady_clock::now ();
339+         free_buffers.emplace (ggml_cann_buffer{ptr, it->second , now});
340+ #ifdef  DEBUG_CANN_MALLOC
341+         GGML_LOG_INFO (
342+             " cann pool[%d]: return   %p, " 
343+             " pool_size = %5u MB\n "  ,
344+             device, ptr,
345+             (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
346+ #endif 
347+     }
348+ };
348349
349350/* *
350351 * @brief A pool of CANN buffers(segment buffer). 
@@ -531,6 +532,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
531532     * @param size Size of the buffer to free. 
532533     */  
533534    void  free (void * ptr, size_t  size) override  {
535+         GGML_UNUSED (size);
534536        for  (int  i = 0 ; i < MAX_BUFFERS; ++i) {
535537            ggml_cann_buffer& b = buffer_pool[i];
536538            if  (b.ptr  != ptr) {
0 commit comments