1515 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
1616 * Copyright (c) 2010 IBM Corporation. All rights reserved.
1717 * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
18- * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
18+ * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
1919 * reserved.
2020 *
2121 * $COPYRIGHT$
@@ -113,11 +113,11 @@ static inline bool mca_mpool_rgpusm_deregister_lru (mca_mpool_base_module_t *mpo
113113 mpool -> rcache -> rcache_delete (mpool -> rcache , old_reg );
114114
115115 /* Drop the rcache lock while we deregister the memory */
116- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
116+ opal_mutex_unlock (& mpool -> rcache -> lock );
117117 assert (old_reg -> ref_count == 0 );
118118 rc = mpool_rgpusm -> resources .deregister_mem (mpool_rgpusm -> resources .reg_data ,
119119 old_reg );
120- OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
120+ opal_mutex_lock (& mpool -> rcache -> lock );
121121
122122 /* This introduces a potential leak of registrations if
123123 the deregistration fails to occur as we no longer have
@@ -242,7 +242,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
242242 }
243243
244244 /* Check to see if memory is registered and stored in the cache. */
245- OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
245+ opal_mutex_lock (& mpool -> rcache -> lock );
246246 mpool -> rcache -> rcache_find (mpool -> rcache , addr , size , reg );
247247
248248 /* If *reg is not NULL, we have a registration. Let us see if the
@@ -306,7 +306,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
306306 (opal_list_item_t * )(* reg ));
307307 }
308308 (* reg )-> ref_count ++ ;
309- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
309+ opal_mutex_unlock (& mpool -> rcache -> lock );
310310 opal_output (-1 , "reg->ref_count=%d" , (int )(* reg )-> ref_count );
311311 opal_output_verbose (80 , mca_mpool_rgpusm_component .output ,
312312 "RGPUSM: Found entry in cache addr=%p, size=%d" , addr , (int )size );
@@ -322,7 +322,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
322322
323323 item = opal_free_list_get (& mpool_rgpusm -> reg_list );
324324 if (NULL == item ) {
325- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
325+ opal_mutex_unlock (& mpool -> rcache -> lock );
326326 return OPAL_ERR_OUT_OF_RESOURCE ;
327327 }
328328 rgpusm_reg = (mca_mpool_common_cuda_reg_t * )item ;
@@ -399,7 +399,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
399399 }
400400
401401 if (rc != OPAL_SUCCESS ) {
402- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
402+ opal_mutex_unlock (& mpool -> rcache -> lock );
403403 opal_free_list_return (& mpool_rgpusm -> reg_list , item );
404404 return rc ;
405405 }
@@ -439,7 +439,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
439439 }
440440
441441 if (rc != OPAL_SUCCESS ) {
442- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
442+ opal_mutex_unlock (& mpool -> rcache -> lock );
443443 opal_free_list_return (& mpool_rgpusm -> reg_list , item );
444444 /* We cannot recover from this. We can be here if the size of
445445 * the cache is smaller than the amount of memory we are
@@ -454,10 +454,8 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
454454
455455 rgpusm_reg -> base .ref_count ++ ;
456456 * reg = (mca_mpool_base_registration_t * )rgpusm_reg ;
457- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
457+ opal_mutex_unlock (& mpool -> rcache -> lock );
458458
459- /* Cleanup any vmas that we have deferred deletion on */
460- mpool -> rcache -> rcache_clean (mpool -> rcache );
461459 return OPAL_SUCCESS ;
462460}
463461
@@ -483,7 +481,7 @@ int mca_mpool_rgpusm_find(struct mca_mpool_base_module_t *mpool, void *addr,
483481 base = addr ;
484482 bound = base + size - 1 ; /* To keep cache hits working correctly */
485483
486- OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
484+ opal_mutex_lock (& mpool -> rcache -> lock );
487485 opal_output (-1 , "Looking for addr=%p, size=%d" , addr , (int )size );
488486 rc = mpool -> rcache -> rcache_find (mpool -> rcache , addr , size , reg );
489487 if (* reg != NULL && mca_mpool_rgpusm_component .leave_pinned ) {
@@ -495,12 +493,12 @@ int mca_mpool_rgpusm_find(struct mca_mpool_base_module_t *mpool, void *addr,
495493 } else {
496494 mpool_rgpusm -> stat_cache_notfound ++ ;
497495 }
498- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
496+ opal_mutex_unlock (& mpool -> rcache -> lock );
499497
500498 return rc ;
501499}
502500
503- static inline bool registration_is_cachebale (mca_mpool_base_registration_t * reg )
501+ static inline bool registration_is_cacheable (mca_mpool_base_registration_t * reg )
504502{
505503 return !(reg -> flags &
506504 (MCA_MPOOL_FLAGS_CACHE_BYPASS |
@@ -514,14 +512,14 @@ int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
514512 int rc = OPAL_SUCCESS ;
515513 assert (reg -> ref_count > 0 );
516514
517- OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
515+ opal_mutex_lock (& mpool -> rcache -> lock );
518516 reg -> ref_count -- ;
519517 opal_output (-1 , "Deregister: reg->ref_count=%d" , (int )reg -> ref_count );
520518 if (reg -> ref_count > 0 ) {
521- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
519+ opal_mutex_unlock (& mpool -> rcache -> lock );
522520 return OPAL_SUCCESS ;
523521 }
524- if (mca_mpool_rgpusm_component .leave_pinned && registration_is_cachebale (reg ))
522+ if (mca_mpool_rgpusm_component .leave_pinned && registration_is_cacheable (reg ))
525523 {
526524 /* if leave_pinned is set don't deregister memory, but put it
527525 * on LRU list for future use */
@@ -535,7 +533,7 @@ int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
535533 mpool -> rcache -> rcache_delete (mpool -> rcache , reg );
536534
537535 /* Drop the rcache lock before deregistring the memory */
538- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
536+ opal_mutex_unlock (& mpool -> rcache -> lock );
539537
540538 {
541539 mca_mpool_rgpusm_module_t * mpool_rgpusm = (mca_mpool_rgpusm_module_t * )mpool ;
@@ -545,17 +543,14 @@ int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
545543 reg );
546544 }
547545
548- OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
546+ opal_mutex_lock (& mpool -> rcache -> lock );
549547
550548 if (OPAL_SUCCESS == rc ) {
551549 opal_free_list_return (& mpool_rgpusm -> reg_list ,
552550 (opal_free_list_item_t * )reg );
553551 }
554552 }
555- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
556-
557- /* Cleanup any vmas that we have deferred deletion on */
558- mpool -> rcache -> rcache_clean (mpool -> rcache );
553+ opal_mutex_unlock (& mpool -> rcache -> lock );
559554
560555 return rc ;
561556}
@@ -572,7 +567,7 @@ int mca_mpool_rgpusm_deregister_no_lock(struct mca_mpool_base_module_t *mpool,
572567 if (reg -> ref_count > 0 ) {
573568 return OPAL_SUCCESS ;
574569 }
575- if (mca_mpool_rgpusm_component .leave_pinned && registration_is_cachebale (reg ))
570+ if (mca_mpool_rgpusm_component .leave_pinned && registration_is_cacheable (reg ))
576571 {
577572 /* if leave_pinned is set don't deregister memory, but put it
578573 * on LRU list for future use */
@@ -599,15 +594,28 @@ int mca_mpool_rgpusm_deregister_no_lock(struct mca_mpool_base_module_t *mpool,
599594 return rc ;
600595}
601596
602- #define RGPUSM_MPOOL_NREGS 100
597+ static int iterate_dereg_finalize (mca_mpool_base_registration_t * rgpusm_reg , void * ctx )
598+ {
599+ mca_mpool_rgpusm_module_t * mpool_rgpusm = (mca_mpool_rgpusm_module_t * ) ctx ;
600+
601+ if ((mca_mpool_base_module_t * ) mpool_rgpusm != rgpusm_reg -> mpool ) {
602+ return 0 ;
603+ }
604+
605+ if (registration_is_cacheable (rgpusm_reg )) {
606+ opal_list_remove_item (& mpool_rgpusm -> lru_list , (opal_list_item_t * ) rgpusm_reg );
607+ }
608+
609+ /* set the reference count to 0 otherwise dereg will fail on assert */
610+ rgpusm_reg -> ref_count = 0 ;
611+ (void ) mpool_rgpusm -> resources .deregister_mem (mpool_rgpusm -> resources .reg_data , rgpusm_reg );
612+
613+ return 0 ;
614+ }
603615
604616void mca_mpool_rgpusm_finalize (struct mca_mpool_base_module_t * mpool )
605617{
606618 mca_mpool_rgpusm_module_t * mpool_rgpusm = (mca_mpool_rgpusm_module_t * )mpool ;
607- mca_mpool_base_registration_t * reg ;
608- mca_mpool_base_registration_t * regs [RGPUSM_MPOOL_NREGS ];
609- int reg_cnt , i ;
610- int rc ;
611619
612620 /* Statistic */
613621 if (true == mca_mpool_rgpusm_component .print_stats ) {
@@ -619,49 +627,11 @@ void mca_mpool_rgpusm_finalize(struct mca_mpool_base_module_t *mpool)
619627 mpool_rgpusm -> stat_evicted );
620628 }
621629
622- OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
623- do {
624- reg_cnt = mpool -> rcache -> rcache_find_all (mpool -> rcache , 0 , (size_t )-1 ,
625- regs , RGPUSM_MPOOL_NREGS );
626- opal_output (-1 , "Registration size at finalize = %d" , reg_cnt );
627-
628- for (i = 0 ; i < reg_cnt ; i ++ ) {
629- reg = regs [i ];
630630
631- if (reg -> ref_count ) {
632- reg -> ref_count = 0 ; /* otherway dereg will fail on assert */
633- } else if (mca_mpool_rgpusm_component .leave_pinned ) {
634- opal_list_remove_item (& mpool_rgpusm -> lru_list ,
635- (opal_list_item_t * )reg );
636- }
637-
638- /* Remove from rcache first */
639- mpool -> rcache -> rcache_delete (mpool -> rcache , reg );
640-
641- /* Drop lock before deregistering memory */
642- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
643- assert (reg -> ref_count == 0 );
644- rc = mpool_rgpusm -> resources .deregister_mem (mpool_rgpusm -> resources .reg_data ,
645- reg );
646- OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
647-
648- if (rc != OPAL_SUCCESS ) {
649- /* Potentially lose track of registrations
650- do we have to put it back? */
651- continue ;
652- }
653-
654- opal_free_list_return (& mpool_rgpusm -> reg_list ,
655- (opal_free_list_item_t * ) reg );
656- }
657- } while (reg_cnt == RGPUSM_MPOOL_NREGS );
631+ (void ) mpool -> rcache -> rcache_iterate (mpool -> rcache , NULL , (size_t ) -1 ,
632+ iterate_dereg_finalize , (void * ) mpool );
658633
659634 OBJ_DESTRUCT (& mpool_rgpusm -> lru_list );
660635 OBJ_DESTRUCT (& mpool_rgpusm -> reg_list );
661- OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
662-
663- /* Cleanup any vmas that we have deferred deletion on */
664- mpool -> rcache -> rcache_clean (mpool -> rcache );
665-
666636}
667637
0 commit comments