@@ -156,196 +156,196 @@ const ggml_cann_device_info& ggml_cann_info() {
156156 * This class manages a pool of CANN buffers for a specific device.
157157 */
158158struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
159- /* *
160- * @brief The maximum reuse margin for a buffer.
161- */
162- static const size_t max_reuse_margin = 1ull << 22 ; // 4MB
163-
164- /* *
165- * @brief The minimum free margin for a buffer.
166- */
167- static const size_t min_free_margin = 1ull << 20 ; // 1MB
168-
169- /* *
170- * @brief The alignment for buffer allocation.
171- */
172- static const size_t alignment = 128 ;
173-
174- /* *
175- * @brief The device ID associated with this buffer pool.
176- */
177- int device;
178-
179- /* *
180- * @brief Whether to disable clean during buffer allocation.
181- */
182- bool disable_clean = false ;
183-
184- /* *
185- * @brief Structure representing a CANN buffer.
186- */
187- struct ggml_cann_buffer {
188- void * ptr = nullptr ; // /< Pointer to the buffer.
189- size_t size = 0 ; // /< Size of the buffer.
190- std::chrono::steady_clock::time_point last_used; // /< Last used time.
191-
192- bool operator >(const ggml_cann_buffer& other) const {
193- return size > other.size ;
194- }
195- };
196-
197- /* *
198- * @brief Array of CANN buffers in the pool.
199- */
200- std::unordered_map<void *, size_t > buffer_pool;
201- std::priority_queue<ggml_cann_buffer,
202- std::vector<ggml_cann_buffer>,
203- std::greater<>> free_buffers ;
204-
205- /* *
206- * @brief Total size of all buffers in the pool.
207- */
208- size_t pool_size = 0 ;
209-
210- /* *
211- * @brief Constructor to initialize the buffer pool for a specific device.
212- *
213- * @param device The device ID to associate with this buffer pool.
214- */
215- explicit ggml_cann_pool_buf_prio (int device) : device(device) {
216- disable_clean = getenv (" GGML_CANN_DISABLE_BUF_POOL_CLEAN" ) != nullptr ;
159+ /* *
160+ * @brief The maximum reuse margin for a buffer.
161+ */
162+ static const size_t max_reuse_margin = 1ull << 22 ; // 4MB
163+
164+ /* *
165+ * @brief The minimum free margin for a buffer.
166+ */
167+ static const size_t min_free_margin = 1ull << 20 ; // 1MB
168+
169+ /* *
170+ * @brief The alignment for buffer allocation.
171+ */
172+ static const size_t alignment = 128 ;
173+
174+ /* *
175+ * @brief The device ID associated with this buffer pool.
176+ */
177+ int device;
178+
179+ /* *
180+ * @brief Whether to disable clean during buffer allocation.
181+ */
182+ bool disable_clean = false ;
183+
184+ /* *
185+ * @brief Structure representing a CANN buffer.
186+ */
187+ struct ggml_cann_buffer {
188+ void * ptr = nullptr ; // /< Pointer to the buffer.
189+ size_t size = 0 ; // /< Size of the buffer.
190+ std::chrono::steady_clock::time_point last_used; // /< Last used time.
191+
192+ bool operator >(const ggml_cann_buffer& other) const {
193+ return size > other.size ;
217194 }
195+ };
218196
219- /* *
220- * @brief Destructor to free all buffers in the pool.
221- */
222- ~ggml_cann_pool_buf_prio () {
223- ggml_cann_set_device (device);
224- for (auto & [b_ptr, b_size] : buffer_pool) {
225- aclrtFree (b_ptr);
226- pool_size -= b_size;
227- }
228- buffer_pool.clear ();
229- GGML_ASSERT (pool_size == 0 );
197+ /* *
198+ * @brief Array of CANN buffers in the pool.
199+ */
200+ std::unordered_map<void *, size_t > buffer_pool;
201+ std::priority_queue<ggml_cann_buffer,
202+ std::vector<ggml_cann_buffer>,
203+ std::greater<>> free_buffers ;
204+
205+ /* *
206+ * @brief Total size of all buffers in the pool.
207+ */
208+ size_t pool_size = 0 ;
209+
210+ /* *
211+ * @brief Constructor to initialize the buffer pool for a specific device.
212+ *
213+ * @param device The device ID to associate with this buffer pool.
214+ */
215+ explicit ggml_cann_pool_buf_prio (int device) : device(device) {
216+ disable_clean = getenv (" GGML_CANN_DISABLE_BUF_POOL_CLEAN" ) != nullptr ;
217+ }
218+
219+ /* *
220+ * @brief Destructor to free all buffers in the pool.
221+ */
222+ ~ggml_cann_pool_buf_prio () {
223+ ggml_cann_set_device (device);
224+ for (auto & [b_ptr, b_size] : buffer_pool) {
225+ aclrtFree (b_ptr);
226+ pool_size -= b_size;
230227 }
228+ buffer_pool.clear ();
229+ GGML_ASSERT (pool_size == 0 );
230+ }
231231
232- /* *
233- * @brief Allocate a buffer of the given size.
234- *
235- * @param size The size of the buffer to allocate.
236- * @param actual_size A pointer to a variable to receive the actual size of
237- * the allocated buffer.
238- * @return A pointer to the allocated buffer.
239- */
240- void * alloc (size_t size, size_t * actual_size) override {
241- size = GGML_PAD (size, alignment);
242- if (size == 0 ) {
243- size = alignment;
244- }
232+ /* *
233+ * @brief Allocate a buffer of the given size.
234+ *
235+ * @param size The size of the buffer to allocate.
236+ * @param actual_size A pointer to a variable to receive the actual size of
237+ * the allocated buffer.
238+ * @return A pointer to the allocated buffer.
239+ */
240+ void * alloc (size_t size, size_t * actual_size) override {
241+ size = GGML_PAD (size, alignment);
242+ if (size == 0 ) {
243+ size = alignment;
244+ }
245245
246- void * ptr = nullptr ;
247- auto now = std::chrono::steady_clock::now ();
248-
249- std::vector<ggml_cann_buffer> free_buffers_rest;
250- free_buffers_rest.reserve (free_buffers.size ());
251- while (!free_buffers.empty ()) {
252- auto b = free_buffers.top ();
253- free_buffers.pop ();
254-
255- if (b.size >= size) {
256- // reuse the buffer if the size is enough
257- const size_t margin = b.size - size;
258- if (margin <= max_reuse_margin) {
259- *actual_size = b.size ;
260- ptr = b.ptr ;
261- #ifdef DEBUG_CANN_MALLOC
262- GGML_LOG_INFO (
263- " cann pool[%d]: reused %p, "
264- " pool_size = %5u MB, "
265- " size = %5u MB, "
266- " margin = %5u MB\n " ,
267- device, b.ptr ,
268- (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
269- (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ),
270- (uint32_t )(GGML_PAD (margin, 1048576 ) / 1048576 ));
271- #endif
272- break ;
273- }
274- }
246+ void * ptr = nullptr ;
247+ auto now = std::chrono::steady_clock::now ();
248+
249+ std::vector<ggml_cann_buffer> free_buffers_rest;
250+ free_buffers_rest.reserve (free_buffers.size ());
251+ while (!free_buffers.empty ()) {
252+ auto b = free_buffers.top ();
253+ free_buffers.pop ();
275254
276- bool should_clean = !disable_clean &&
277- b.size > min_free_margin &&
278- std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used ).count () > 100 ;
279- if (should_clean) {
280- // free the buffer if the size is needed to be freed
281- ACL_CHECK (aclrtFree (b.ptr ));
282- pool_size -= b.size ;
283- buffer_pool.erase (b.ptr );
284- #ifdef DEBUG_CANN_MALLOC
255+ if (b.size >= size) {
256+ // reuse the buffer if the size is enough
257+ const size_t margin = b.size - size;
258+ if (margin <= max_reuse_margin) {
259+ *actual_size = b.size ;
260+ ptr = b.ptr ;
261+ #ifdef DEBUG_CANN_MALLOC
285262 GGML_LOG_INFO (
286- " cann pool[%d]: clean %p, "
263+ " cann pool[%d]: reused %p, "
287264 " pool_size = %5u MB, "
288- " size = %5u MB\n " ,
265+ " size = %5u MB, "
266+ " margin = %5u MB\n " ,
289267 device, b.ptr ,
290268 (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
291- (uint32_t )(GGML_PAD (b.size , 1048576 ) / 1048576 ));
292- #endif
293- continue ;
269+ (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ),
270+ (uint32_t )(GGML_PAD (margin, 1048576 ) / 1048576 ));
271+ #endif
272+ break ;
294273 }
295- free_buffers_rest.push_back (b);
296- }
297- for (ggml_cann_buffer &b : free_buffers_rest) {
298- free_buffers.push (std::move (b));
299274 }
300275
301- #ifdef DEBUG_CANN_MALLOC
302- GGML_LOG_INFO (" cann pool[%d] free pool_size = %5u MB\n\n " , device, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
303- #endif
304- if (ptr != nullptr ) {
305- return ptr;
276+ bool should_clean = !disable_clean &&
277+ b.size > min_free_margin &&
278+ std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used ).count () > 100 ;
279+ if (should_clean) {
280+ // free the buffer if the size is needed to be freed
281+ ACL_CHECK (aclrtFree (b.ptr ));
282+ pool_size -= b.size ;
283+ buffer_pool.erase (b.ptr );
284+ #ifdef DEBUG_CANN_MALLOC
285+ GGML_LOG_INFO (
286+ " cann pool[%d]: clean %p, "
287+ " pool_size = %5u MB, "
288+ " size = %5u MB\n " ,
289+ device, b.ptr ,
290+ (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
291+ (uint32_t )(GGML_PAD (b.size , 1048576 ) / 1048576 ));
292+ #endif
293+ continue ;
306294 }
295+ free_buffers_rest.push_back (b);
296+ }
297+ for (ggml_cann_buffer &b : free_buffers_rest) {
298+ free_buffers.push (std::move (b));
299+ }
307300
308- // allocate a new buffer if no buffer can be reused
309- ggml_cann_set_device (device);
310- ACL_CHECK (aclrtMalloc (&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
311- *actual_size = size;
312- pool_size += size;
313- #ifdef DEBUG_CANN_MALLOC
314- GGML_LOG_INFO (
315- " cann pool[%d]: allocate %p, "
316- " pool_size = %5u MB, "
317- " size = %5u MB\n " ,
318- device, ptr, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
319- (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ));
320- #endif
321- buffer_pool.emplace (ptr, size);
301+ #ifdef DEBUG_CANN_MALLOC
302+ GGML_LOG_INFO (" cann pool[%d] free pool_size = %5u MB\n\n " , device, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
303+ #endif
304+ if (ptr != nullptr ) {
322305 return ptr;
323306 }
324307
325- /* *
326- * @brief Free a buffer and return it to the pool.
327- *
328- * @param ptr Pointer to the buffer to free.
329- * @param size Size of the buffer to free.
330- */
331- void free (void * ptr, size_t size) override {
332- GGML_UNUSED (size);
333- auto it = buffer_pool.find (ptr);
334- if (it == buffer_pool.end ()) {
335- GGML_ABORT (" cann pool[%d]: buffer %p not found in pool\n " , device, ptr);
336- }
308+ // allocate a new buffer if no buffer can be reused
309+ ggml_cann_set_device (device);
310+ ACL_CHECK (aclrtMalloc (&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
311+ *actual_size = size;
312+ pool_size += size;
313+ #ifdef DEBUG_CANN_MALLOC
314+ GGML_LOG_INFO (
315+ " cann pool[%d]: allocate %p, "
316+ " pool_size = %5u MB, "
317+ " size = %5u MB\n " ,
318+ device, ptr, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
319+ (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ));
320+ #endif
321+ buffer_pool.emplace (ptr, size);
322+ return ptr;
323+ }
337324
338- auto now = std::chrono::steady_clock::now ();
339- free_buffers.emplace (ggml_cann_buffer{ptr, it->second , now});
340- #ifdef DEBUG_CANN_MALLOC
341- GGML_LOG_INFO (
342- " cann pool[%d]: return %p, "
343- " pool_size = %5u MB\n " ,
344- device, ptr,
345- (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
346- #endif
325+ /* *
326+ * @brief Free a buffer and return it to the pool.
327+ *
328+ * @param ptr Pointer to the buffer to free.
329+ * @param size Size of the buffer to free.
330+ */
331+ void free (void * ptr, size_t size) override {
332+ GGML_UNUSED (size);
333+ auto it = buffer_pool.find (ptr);
334+ if (it == buffer_pool.end ()) {
335+ GGML_ABORT (" cann pool[%d]: buffer %p not found in pool\n " , device, ptr);
347336 }
348- };
337+
338+ auto now = std::chrono::steady_clock::now ();
339+ free_buffers.emplace (ggml_cann_buffer{ptr, it->second , now});
340+ #ifdef DEBUG_CANN_MALLOC
341+ GGML_LOG_INFO (
342+ " cann pool[%d]: return %p, "
343+ " pool_size = %5u MB\n " ,
344+ device, ptr,
345+ (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
346+ #endif
347+ }
348+ };
349349
350350/* *
351351 * @brief A pool of CANN buffers(segment buffer).
0 commit comments