55
66#include  <assert.h> 
77#include  <stdbool.h> 
8+ #include  <stdatomic.h> 
89#include  <stdint.h> 
910#include  <stdio.h> 
1011#include  <stdlib.h> 
@@ -269,6 +270,155 @@ void rv_debug(riscv_t *rv)
269270HASH_FUNC_IMPL (map_hash , BLOCK_MAP_CAPACITY_BITS , 1  << BLOCK_MAP_CAPACITY_BITS )
270271#endif 
271272
273+ #if  RV32_HAS (T2C )
274+ /* Thread-local hazard pointer slot */ 
275+ static  __thread int  hp_slot  =  -1 ;
276+ static  __thread hazard_pointer_t  hp_local  =  {0 };
277+ 
278+ /* Global sequence counter for block validation */ 
279+ static  _Atomic uint32_t  global_seq_num  =  1 ;
280+ 
281+ /* Initialize hazard pointer domain */ 
282+ hp_domain_t  * hp_domain_new (void )
283+ {
284+     hp_domain_t  * domain  =  calloc (1 , sizeof (hp_domain_t ));
285+     if  (!domain )
286+         return  NULL ;
287+ 
288+     INIT_LIST_HEAD (& domain -> retired_list );
289+     pthread_mutex_init (& domain -> retired_lock , NULL );
290+     domain -> retired_count  =  0 ;
291+ 
292+     for  (int  i  =  0 ; i  <  MAX_THREADS ; i ++ )
293+         atomic_init (& domain -> pointers [i ], NULL );
294+ 
295+     return  domain ;
296+ }
297+ 
298+ /* Free hazard pointer domain */ 
299+ void  hp_domain_free (hp_domain_t  * domain )
300+ {
301+     if  (!domain )
302+         return ;
303+ 
304+     /* Free all retired blocks */ 
305+     pthread_mutex_lock (& domain -> retired_lock );
306+     retired_node_t  * node , * tmp ;
307+     list_for_each_entry_safe (node , tmp , & domain -> retired_list , list ) {
308+         list_del (& node -> list );
309+         /* Free the block */ 
310+         block_t  * block  =  node -> block ;
311+         for  (rv_insn_t  * ir  =  block -> ir_head , * next_ir ; ir ; ir  =  next_ir ) {
312+             next_ir  =  ir -> next ;
313+             free (ir -> fuse );
314+             /* Note: We can't use mpool_free here as rv is not available */ 
315+         }
316+         free (block );
317+         free (node );
318+     }
319+     pthread_mutex_unlock (& domain -> retired_lock );
320+     pthread_mutex_destroy (& domain -> retired_lock );
321+     free (domain );
322+ }
323+ 
324+ /* Acquire a hazard pointer slot for current thread */ 
325+ static  int  hp_acquire_slot (hp_domain_t  * domain )
326+ {
327+     if  (hp_slot  >= 0 )
328+         return  hp_slot ;
329+ 
330+     for  (int  i  =  0 ; i  <  MAX_THREADS ; i ++ ) {
331+         hazard_pointer_t  * expected  =  NULL ;
332+         if  (atomic_compare_exchange_strong_explicit (& domain -> pointers [i ], & expected , & hp_local ,
333+                                                     memory_order_release , memory_order_relaxed )) {
334+             hp_slot  =  i ;
335+             return  i ;
336+         }
337+     }
338+     return  -1 ; /* No slots available */ 
339+ }
340+ 
341+ /* Protect a pointer with hazard pointer */ 
342+ void  hp_protect (hp_domain_t  * domain , void  * ptr )
343+ {
344+     if  (hp_slot  <  0 )
345+         hp_slot  =  hp_acquire_slot (domain );
346+ 
347+     if  (hp_slot  >= 0 )
348+         hp_local .ptr  =  ptr ;
349+ }
350+ 
351+ /* Clear hazard pointer protection */ 
352+ void  hp_clear (hp_domain_t  * domain  UNUSED )
353+ {
354+     if  (hp_slot  >= 0 )
355+         hp_local .ptr  =  NULL ;
356+ }
357+ 
358+ /* Retire a block for later reclamation */ 
359+ static  void  hp_retire (riscv_t  * rv , block_t  * block )
360+ {
361+     hp_domain_t  * domain  =  rv -> hp_domain ;
362+     if  (!domain )
363+         return ;
364+ 
365+     retired_node_t  * node  =  malloc (sizeof (retired_node_t ));
366+     if  (!node )
367+         return ;
368+ 
369+     node -> block  =  block ;
370+     INIT_LIST_HEAD (& node -> list );
371+ 
372+     pthread_mutex_lock (& domain -> retired_lock );
373+     list_add (& node -> list , & domain -> retired_list );
374+     domain -> retired_count ++ ;
375+ 
376+     /* Try to reclaim if we have enough retired blocks */ 
377+     if  (domain -> retired_count  >= HP_MAX_RETIRED ) {
378+         /* Collect all hazard pointers */ 
379+         void  * hazards [MAX_THREADS ];
380+         int  hazard_count  =  0 ;
381+ 
382+         for  (int  i  =  0 ; i  <  MAX_THREADS ; i ++ ) {
383+             hazard_pointer_t  * hp  =  atomic_load_explicit (& domain -> pointers [i ], memory_order_acquire );
384+             if  (hp  &&  hp -> ptr ) {
385+                 hazards [hazard_count ++ ] =  hp -> ptr ;
386+             }
387+         }
388+ 
389+         /* Reclaim blocks not in hazard list */ 
390+         retired_node_t  * cur , * tmp ;
391+         list_for_each_entry_safe (cur , tmp , & domain -> retired_list , list ) {
392+             bool  hazardous  =  false;
393+             for  (int  i  =  0 ; i  <  hazard_count ; i ++ ) {
394+                 if  (cur -> block  ==  hazards [i ]) {
395+                     hazardous  =  true;
396+                     break ;
397+                 }
398+             }
399+ 
400+             if  (!hazardous ) {
401+                 list_del (& cur -> list );
402+                 domain -> retired_count -- ;
403+ 
404+                 /* Free the block */ 
405+                 block_t  * b  =  cur -> block ;
406+                 for  (rv_insn_t  * ir  =  b -> ir_head , * next_ir ; ir ; ir  =  next_ir ) {
407+                     next_ir  =  ir -> next ;
408+                     free (ir -> fuse );
409+                     mpool_free (rv -> block_ir_mp , ir );
410+                 }
411+                 list_del_init (& b -> list );
412+                 mpool_free (rv -> block_mp , b );
413+                 free (cur );
414+             }
415+         }
416+     }
417+ 
418+     pthread_mutex_unlock (& domain -> retired_lock );
419+ }
420+ #endif 
421+ 
272422/* allocate a basic block */ 
273423static  block_t  * block_alloc (riscv_t  * rv )
274424{
@@ -278,12 +428,14 @@ static block_t *block_alloc(riscv_t *rv)
278428#if  RV32_HAS (JIT )
279429    block -> translatable  =  true;
280430    block -> hot  =  false;
281-     block -> hot2   =   false;
431+     atomic_init ( & block -> hot2 ,  false) ;
282432    block -> has_loops  =  false;
283433    block -> n_invoke  =  0 ;
284434    INIT_LIST_HEAD (& block -> list );
285435#if  RV32_HAS (T2C )
286-     block -> compiled  =  false;
436+     atomic_init (& block -> compiled , false);
437+     atomic_init (& block -> func , NULL );
438+     block -> seq_num  =  atomic_fetch_add (& global_seq_num , 1 );
287439#endif 
288440#endif 
289441    return  block ;
@@ -918,6 +1070,38 @@ static block_t *block_find_or_translate(riscv_t *rv)
9181070        return  next_blk ;
9191071    }
9201072
1073+ #if  RV32_HAS (T2C )
1074+     /* Never evict blocks with hot2 set or being compiled - they may be in use 
1075+      * by another thread 
1076+      */ 
1077+     if  (atomic_load_explicit (& replaced_blk -> hot2 , memory_order_acquire ) || 
1078+         atomic_load_explicit (& replaced_blk -> compiled , memory_order_acquire )) {
1079+         /* This block has compiled T2C code - do not evict it. 
1080+          * Try to find the block we actually need in the cache. 
1081+          */ 
1082+         block_t  * found  =  cache_get (rv -> block_cache , rv -> PC , false);
1083+         pthread_mutex_unlock (& rv -> cache_lock );
1084+ 
1085+         if  (found ) {
1086+             /* Found the block we need - free the newly allocated one */ 
1087+             for  (rv_insn_t  * ir  =  next_blk -> ir_head , * next_ir ; ir ;
1088+                  ir  =  next_ir ) {
1089+                 next_ir  =  ir -> next ;
1090+                 free (ir -> fuse );
1091+                 mpool_free (rv -> block_ir_mp , ir );
1092+             }
1093+             list_del_init (& next_blk -> list );
1094+             mpool_free (rv -> block_mp , next_blk );
1095+             return  found ;
1096+         }
1097+ 
1098+         /* If not found and cache is full of hot2 blocks, return the new block 
1099+          * even though it won't be cached. This prevents deadlock. 
1100+          */ 
1101+         return  next_blk ;
1102+     }
1103+ #endif 
1104+ 
9211105    if  (prev  ==  replaced_blk )
9221106        prev  =  NULL ;
9231107
@@ -930,37 +1114,40 @@ static block_t *block_find_or_translate(riscv_t *rv)
9301114        rv_insn_t  * taken  =  entry -> ir_tail -> branch_taken ,
9311115                  * untaken  =  entry -> ir_tail -> branch_untaken ;
9321116
933-         if  (taken  ==  replaced_blk_entry ) { 
1117+         if  (taken  ==  replaced_blk_entry )
9341118            entry -> ir_tail -> branch_taken  =  NULL ;
935-         }
936-         if  (untaken  ==  replaced_blk_entry ) {
1119+         if  (untaken  ==  replaced_blk_entry )
9371120            entry -> ir_tail -> branch_untaken  =  NULL ;
938-         }
9391121
9401122        /* upadte JALR LUT */ 
941-         if  (!entry -> ir_tail -> branch_table ) { 
1123+         if  (!entry -> ir_tail -> branch_table )
9421124            continue ;
943-         }
9441125
945-         /** 
946-          * TODO: upadate all JALR instructions which references to this 
947-          * basic block as the destination. 
1126+         /* TODO: upadate all JALR instructions which references to this basic 
1127+          * block as the destination. 
9481128         */ 
9491129    }
9501130
951-     /* free IRs in replaced block */ 
952-     for  (rv_insn_t  * ir  =  replaced_blk -> ir_head , * next_ir ; ir  !=  NULL ;
953-          ir  =  next_ir ) {
954-         next_ir  =  ir -> next ;
955- 
956-         if  (ir -> fuse )
1131+ #if  RV32_HAS (T2C )
1132+     /* Use hazard pointers for safe reclamation of hot2 blocks */ 
1133+     if  (atomic_load_explicit (& replaced_blk -> hot2 , memory_order_acquire ) || 
1134+         atomic_load_explicit (& replaced_blk -> compiled , memory_order_acquire )) {
1135+         /* Retire hot2 or being-compiled blocks for safe reclamation */ 
1136+         hp_retire (rv , replaced_blk );
1137+     } else  {
1138+ #endif 
1139+         /* Free non-hot2 blocks immediately */ 
1140+         for  (rv_insn_t  * ir  =  replaced_blk -> ir_head , * next_ir ; ir ; ir  =  next_ir ) {
1141+             next_ir  =  ir -> next ;
9571142            free (ir -> fuse );
1143+             mpool_free (rv -> block_ir_mp , ir );
1144+         }
9581145
959-         mpool_free (rv -> block_ir_mp , ir );
1146+         list_del_init (& replaced_blk -> list );
1147+         mpool_free (rv -> block_mp , replaced_blk );
1148+ #if  RV32_HAS (T2C )
9601149    }
961- 
962-     list_del_init (& replaced_blk -> list );
963-     mpool_free (rv -> block_mp , replaced_blk );
1150+ #endif 
9641151#if  RV32_HAS (T2C )
9651152    pthread_mutex_unlock (& rv -> cache_lock );
9661153#endif 
@@ -1121,18 +1308,38 @@ void rv_step(void *arg)
11211308#if  RV32_HAS (JIT )
11221309#if  RV32_HAS (T2C )
11231310        /* executed through the tier-2 JIT compiler */ 
1124-         if  (block -> hot2 ) {
1125-             ((exec_t2c_func_t ) block -> func )(rv );
1311+         if  (atomic_load_explicit (& block -> hot2 , memory_order_acquire )) {
1312+             /* Protect block with hazard pointer during execution */ 
1313+             hp_protect (rv -> hp_domain , block );
1314+ 
1315+             /* Load function pointer with acquire ordering for synchronization */ 
1316+             exec_t2c_func_t  func  =  (exec_t2c_func_t ) atomic_load_explicit (& block -> func , memory_order_acquire );
1317+ 
1318+             /* Double-check hot2 flag is still set after loading function */ 
1319+             if  (func  &&  atomic_load_explicit (& block -> hot2 , memory_order_acquire )) {
1320+                 /* Memory barrier to ensure we see the latest code */ 
1321+                 atomic_thread_fence (memory_order_acquire );
1322+                 func (rv );
1323+             }
1324+ 
1325+             /* Clear hazard pointer protection */ 
1326+             hp_clear (rv -> hp_domain );
1327+ 
11261328            prev  =  NULL ;
11271329            continue ;
11281330        } /* check if invoking times of t1 generated code exceed threshold */ 
1129-         else  if  (!block -> compiled  &&  block -> n_invoke  >= THRESHOLD ) {
1130-             block -> compiled  =  true;
1331+         else  if  (!atomic_load_explicit (& block -> compiled , memory_order_acquire ) && 
1332+                  block -> n_invoke  >= THRESHOLD ) {
1333+             /* Mark block as queued for compilation to avoid re-queueing */ 
1334+             atomic_store_explicit (& block -> compiled , true, memory_order_release );
11311335            queue_entry_t  * entry  =  malloc (sizeof (queue_entry_t ));
1132-             entry -> block  =  block ;
1133-             pthread_mutex_lock (& rv -> wait_queue_lock );
1134-             list_add (& entry -> list , & rv -> wait_queue );
1135-             pthread_mutex_unlock (& rv -> wait_queue_lock );
1336+             if  (entry ) {
1337+                 entry -> block  =  block ;
1338+                 pthread_mutex_lock (& rv -> wait_queue_lock );
1339+                 list_add (& entry -> list , & rv -> wait_queue );
1340+                 pthread_cond_signal (& rv -> wait_queue_cond );
1341+                 pthread_mutex_unlock (& rv -> wait_queue_lock );
1342+             }
11361343        }
11371344#endif 
11381345        /* executed through the tier-1 JIT compiler */ 
0 commit comments