1515 * Copyright (c) 2007      Mellanox Technologies. All rights reserved. 
1616 * Copyright (c) 2010      IBM Corporation.  All rights reserved. 
1717 * Copyright (c) 2012-2015 NVIDIA Corporation.  All rights reserved. 
18-  * Copyright (c) 2015       Los Alamos National Security, LLC.  All rights 
18+  * Copyright (c) 2015-2016  Los Alamos National Security, LLC.  All rights 
1919 *                         reserved. 
2020 * 
2121 * $COPYRIGHT$ 
@@ -113,11 +113,11 @@ static inline bool mca_mpool_rgpusm_deregister_lru (mca_mpool_base_module_t *mpo
113113    mpool -> rcache -> rcache_delete (mpool -> rcache , old_reg );
114114
115115    /* Drop the rcache lock while we deregister the memory */ 
116-     OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
116+     opal_mutex_unlock   (& mpool -> rcache -> lock );
117117    assert (old_reg -> ref_count  ==  0 );
118118    rc  =  mpool_rgpusm -> resources .deregister_mem (mpool_rgpusm -> resources .reg_data ,
119119                                                old_reg );
120-     OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
120+     opal_mutex_lock   (& mpool -> rcache -> lock );
121121
122122    /* This introduces a potential leak of registrations if 
123123       the deregistration fails to occur as we no longer have 
@@ -242,7 +242,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
242242    }
243243
244244    /* Check to see if memory is registered and stored in the cache. */ 
245-     OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
245+     opal_mutex_lock   (& mpool -> rcache -> lock );
246246    mpool -> rcache -> rcache_find (mpool -> rcache , addr , size , reg );
247247
248248    /* If *reg is not NULL, we have a registration.  Let us see if the 
@@ -306,7 +306,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
306306                                  (opal_list_item_t * )(* reg ));
307307        }
308308        (* reg )-> ref_count ++ ;
309-         OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
309+         opal_mutex_unlock   (& mpool -> rcache -> lock );
310310        opal_output (-1 , "reg->ref_count=%d" , (int )(* reg )-> ref_count );
311311        opal_output_verbose (80 , mca_mpool_rgpusm_component .output ,
312312                           "RGPUSM: Found entry in cache addr=%p, size=%d" , addr , (int )size );
@@ -322,7 +322,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
322322
323323    item  =  opal_free_list_get  (& mpool_rgpusm -> reg_list );
324324    if (NULL  ==  item ) {
325-         OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
325+         opal_mutex_unlock   (& mpool -> rcache -> lock );
326326        return  OPAL_ERR_OUT_OF_RESOURCE ;
327327    }
328328    rgpusm_reg  =  (mca_mpool_common_cuda_reg_t * )item ;
@@ -399,7 +399,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
399399    }
400400
401401    if (rc  !=  OPAL_SUCCESS ) {
402-         OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
402+         opal_mutex_unlock   (& mpool -> rcache -> lock );
403403        opal_free_list_return  (& mpool_rgpusm -> reg_list , item );
404404        return  rc ;
405405    }
@@ -439,7 +439,7 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
439439    }
440440
441441    if (rc  !=  OPAL_SUCCESS ) {
442-         OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
442+         opal_mutex_unlock   (& mpool -> rcache -> lock );
443443        opal_free_list_return  (& mpool_rgpusm -> reg_list , item );
444444        /* We cannot recover from this.  We can be here if the size of 
445445         * the cache is smaller than the amount of memory we are 
@@ -454,10 +454,8 @@ int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
454454
455455    rgpusm_reg -> base .ref_count ++ ;
456456    * reg  =  (mca_mpool_base_registration_t  * )rgpusm_reg ;
457-     OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
457+     opal_mutex_unlock   (& mpool -> rcache -> lock );
458458
459-     /* Cleanup any vmas that we have deferred deletion on */ 
460-     mpool -> rcache -> rcache_clean (mpool -> rcache );
461459    return  OPAL_SUCCESS ;
462460}
463461
@@ -483,7 +481,7 @@ int mca_mpool_rgpusm_find(struct mca_mpool_base_module_t *mpool, void *addr,
483481    base  =  addr ;
484482    bound  =  base  +  size  -  1 ; /* To keep cache hits working correctly */ 
485483
486-     OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
484+     opal_mutex_lock   (& mpool -> rcache -> lock );
487485    opal_output (-1 , "Looking for addr=%p, size=%d" , addr , (int )size );
488486    rc  =  mpool -> rcache -> rcache_find (mpool -> rcache , addr , size , reg );
489487    if (* reg  !=  NULL  &&  mca_mpool_rgpusm_component .leave_pinned ) {
@@ -495,12 +493,12 @@ int mca_mpool_rgpusm_find(struct mca_mpool_base_module_t *mpool, void *addr,
495493    } else  {
496494        mpool_rgpusm -> stat_cache_notfound ++ ;
497495    }
498-     OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
496+     opal_mutex_unlock   (& mpool -> rcache -> lock );
499497
500498    return  rc ;
501499}
502500
503- static  inline  bool  registration_is_cachebale (mca_mpool_base_registration_t  * reg )
501+ static  inline  bool  registration_is_cacheable (mca_mpool_base_registration_t  * reg )
504502{
505503     return  !(reg -> flags  & 
506504             (MCA_MPOOL_FLAGS_CACHE_BYPASS  |
@@ -514,14 +512,14 @@ int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
514512    int  rc  =  OPAL_SUCCESS ;
515513    assert (reg -> ref_count  >  0 );
516514
517-     OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
515+     opal_mutex_lock   (& mpool -> rcache -> lock );
518516    reg -> ref_count -- ;
519517    opal_output (-1 , "Deregister: reg->ref_count=%d" , (int )reg -> ref_count );
520518    if (reg -> ref_count  >  0 ) {
521-         OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
519+         opal_mutex_unlock   (& mpool -> rcache -> lock );
522520        return  OPAL_SUCCESS ;
523521    }
524-     if (mca_mpool_rgpusm_component .leave_pinned  &&  registration_is_cachebale (reg ))
522+     if (mca_mpool_rgpusm_component .leave_pinned  &&  registration_is_cacheable (reg ))
525523    {
526524        /* if leave_pinned is set don't deregister memory, but put it 
527525         * on LRU list for future use */ 
@@ -535,7 +533,7 @@ int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
535533            mpool -> rcache -> rcache_delete (mpool -> rcache , reg );
536534
537535        /* Drop the rcache lock before deregistring the memory */ 
538-         OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
536+         opal_mutex_unlock   (& mpool -> rcache -> lock );
539537
540538        {
541539             mca_mpool_rgpusm_module_t  * mpool_rgpusm  =  (mca_mpool_rgpusm_module_t  * )mpool ;
@@ -545,17 +543,14 @@ int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
545543                                                         reg );
546544         }
547545
548-         OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
546+         opal_mutex_lock   (& mpool -> rcache -> lock );
549547
550548        if (OPAL_SUCCESS  ==  rc ) {
551549            opal_free_list_return  (& mpool_rgpusm -> reg_list ,
552550                                   (opal_free_list_item_t * )reg );
553551        }
554552    }
555-     OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
556- 
557-     /* Cleanup any vmas that we have deferred deletion on */ 
558-     mpool -> rcache -> rcache_clean (mpool -> rcache );
553+     opal_mutex_unlock  (& mpool -> rcache -> lock );
559554
560555    return  rc ;
561556}
@@ -572,7 +567,7 @@ int mca_mpool_rgpusm_deregister_no_lock(struct mca_mpool_base_module_t *mpool,
572567    if (reg -> ref_count  >  0 ) {
573568        return  OPAL_SUCCESS ;
574569    }
575-     if (mca_mpool_rgpusm_component .leave_pinned  &&  registration_is_cachebale (reg ))
570+     if (mca_mpool_rgpusm_component .leave_pinned  &&  registration_is_cacheable (reg ))
576571    {
577572        /* if leave_pinned is set don't deregister memory, but put it 
578573         * on LRU list for future use */ 
@@ -599,15 +594,28 @@ int mca_mpool_rgpusm_deregister_no_lock(struct mca_mpool_base_module_t *mpool,
599594    return  rc ;
600595}
601596
602- #define  RGPUSM_MPOOL_NREGS  100
597+ static  int  iterate_dereg_finalize  (mca_mpool_base_registration_t  * rgpusm_reg , void  * ctx )
598+ {
599+     mca_mpool_rgpusm_module_t  * mpool_rgpusm  =  (mca_mpool_rgpusm_module_t  * ) ctx ;
600+ 
601+     if  ((mca_mpool_base_module_t  * ) mpool_rgpusm  !=  rgpusm_reg -> mpool ) {
602+         return  0 ;
603+     }
604+ 
605+     if  (registration_is_cacheable  (rgpusm_reg )) {
606+         opal_list_remove_item  (& mpool_rgpusm -> lru_list , (opal_list_item_t  * ) rgpusm_reg );
607+     }
608+ 
609+     /* set the reference count to 0 otherwise dereg will fail on assert */ 
610+     rgpusm_reg -> ref_count  =  0 ;
611+     (void ) mpool_rgpusm -> resources .deregister_mem  (mpool_rgpusm -> resources .reg_data , rgpusm_reg );
612+ 
613+     return  0 ;
614+ }
603615
604616void  mca_mpool_rgpusm_finalize (struct  mca_mpool_base_module_t  * mpool )
605617{
606618    mca_mpool_rgpusm_module_t  * mpool_rgpusm  =  (mca_mpool_rgpusm_module_t * )mpool ;
607-     mca_mpool_base_registration_t  * reg ;
608-     mca_mpool_base_registration_t  * regs [RGPUSM_MPOOL_NREGS ];
609-     int  reg_cnt , i ;
610-     int  rc ;
611619
612620    /* Statistic */ 
613621    if (true ==  mca_mpool_rgpusm_component .print_stats ) {
@@ -619,49 +627,11 @@ void mca_mpool_rgpusm_finalize(struct mca_mpool_base_module_t *mpool)
619627                mpool_rgpusm -> stat_evicted );
620628    }
621629
622-     OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
623-     do  {
624-         reg_cnt  =  mpool -> rcache -> rcache_find_all (mpool -> rcache , 0 , (size_t )-1 ,
625-                 regs , RGPUSM_MPOOL_NREGS );
626-         opal_output (-1 , "Registration size at finalize = %d" , reg_cnt );
627- 
628-         for (i  =  0 ; i  <  reg_cnt ; i ++ ) {
629-             reg  =  regs [i ];
630630
631-             if (reg -> ref_count ) {
632-                 reg -> ref_count  =  0 ; /* otherway dereg will fail on assert */ 
633-             } else  if  (mca_mpool_rgpusm_component .leave_pinned ) {
634-                 opal_list_remove_item (& mpool_rgpusm -> lru_list ,
635-                         (opal_list_item_t * )reg );
636-             }
637- 
638-             /* Remove from rcache first */ 
639-             mpool -> rcache -> rcache_delete (mpool -> rcache , reg );
640- 
641-             /* Drop lock before deregistering memory */ 
642-             OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
643-             assert (reg -> ref_count  ==  0 );
644-             rc  =  mpool_rgpusm -> resources .deregister_mem (mpool_rgpusm -> resources .reg_data ,
645-                                                    reg );
646-             OPAL_THREAD_LOCK (& mpool -> rcache -> lock );
647- 
648-             if (rc  !=  OPAL_SUCCESS ) {
649-                 /* Potentially lose track of registrations 
650-                    do we have to put it back? */ 
651-                 continue ;
652-             }
653- 
654-             opal_free_list_return  (& mpool_rgpusm -> reg_list ,
655-                                    (opal_free_list_item_t  * ) reg );
656-         }
657-     } while (reg_cnt  ==  RGPUSM_MPOOL_NREGS );
631+     (void ) mpool -> rcache -> rcache_iterate  (mpool -> rcache , NULL , (size_t ) -1 ,
632+                                           iterate_dereg_finalize , (void  * ) mpool );
658633
659634    OBJ_DESTRUCT (& mpool_rgpusm -> lru_list );
660635    OBJ_DESTRUCT (& mpool_rgpusm -> reg_list );
661-     OPAL_THREAD_UNLOCK (& mpool -> rcache -> lock );
662- 
663-     /* Cleanup any vmas that we have deferred deletion on */ 
664-     mpool -> rcache -> rcache_clean (mpool -> rcache );
665- 
666636}
667637
0 commit comments