@@ -144,9 +144,21 @@ inline __SYCL_GLOBAL__ RawShadow *MemToShadow(uptr addr, uint32_t as) {
144144 return shadow_ptr;
145145}
146146
147- inline Sid GetCurrentSid () {
148- const auto lid = __spirv_BuiltInGlobalLinearId;
149- return lid % kThreadSlotCount ;
147+ // We selected up to 4 work items in each work group to do detection, the whole
148+ // number of selected work items no more than kThreadSlotCount. This may cause
149+ // some false negtive cases in non-uniform memory access which has data race.
150+ // Since the cases are very rare and the change will greatly reduce runtime
151+ // overhead, it should be worthwhile.
152+ inline int GetCurrentSid () {
153+ const size_t lid = LocalLinearId ();
154+ const size_t ThreadPerWorkGroup =
155+ Min (4 , __spirv_BuiltInWorkgroupSize.x * __spirv_BuiltInWorkgroupSize.y *
156+ __spirv_BuiltInWorkgroupSize.z );
157+ if (lid >= ThreadPerWorkGroup)
158+ return -1 ;
159+
160+ const size_t Id = lid + WorkGroupLinearId () * ThreadPerWorkGroup;
161+ return Id < kThreadSlotCount ? Id : -1 ;
150162}
151163
152164inline RawShadow LoadShadow (const __SYCL_GLOBAL__ RawShadow *p) {
@@ -315,7 +327,9 @@ inline bool ContainsSameAccess(__SYCL_GLOBAL__ RawShadow *s, Shadow cur,
315327 __SYCL_GLOBAL__ RawShadow *shadow_mem = MemToShadow (addr, as); \
316328 if (!shadow_mem) \
317329 return ; \
318- Sid sid = GetCurrentSid (); \
330+ int sid = GetCurrentSid (); \
331+ if (sid == -1 ) \
332+ return ; \
319333 uint16_t current_clock = IncrementEpoch (sid) + 1 ; \
320334 TSAN_DEBUG (__spirv_ocl_printf (__tsan_print_raw_shadow, (void *)addr, as, \
321335 (void *)shadow_mem, shadow_mem[0 ], \
@@ -360,7 +374,9 @@ __tsan_read16(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *file,
360374 __SYCL_GLOBAL__ RawShadow *shadow_mem = MemToShadow (addr, as); \
361375 if (!shadow_mem) \
362376 return ; \
363- Sid sid = GetCurrentSid (); \
377+ int sid = GetCurrentSid (); \
378+ if (sid == -1 ) \
379+ return ; \
364380 uint16_t current_clock = IncrementEpoch (sid) + 1 ; \
365381 AccessType type = is_write ? kAccessWrite : kAccessRead ; \
366382 uptr size1 = Min (size, RoundUpTo (addr + 1 , kShadowCell ) - addr); \
@@ -499,39 +515,47 @@ DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_dynamic_local(uptr ptr,
499515}
500516
501517DEVICE_EXTERN_C_INLINE void __tsan_device_barrier () {
502- Sid sid = GetCurrentSid ();
518+ int sid = GetCurrentSid ();
503519
504- // sync current thread clock to global state
505- TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
506- TsanLaunchInfo->Clock [sid].clk_ [sid];
520+ if (sid != -1 ) {
521+ // sync current thread clock to global state
522+ TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
523+ TsanLaunchInfo->Clock [sid].clk_ [sid];
524+ }
507525
508526 __spirv_ControlBarrier (__spv::Scope::Device, __spv::Scope::Device,
509527 __spv::MemorySemanticsMask::SequentiallyConsistent |
510528 __spv::MemorySemanticsMask::CrossWorkgroupMemory |
511529 __spv::MemorySemanticsMask::WorkgroupMemory);
512530
513- // sync global state back
514- for (uptr i = 0 ; i < kThreadSlotCount ; i++)
515- TsanLaunchInfo->Clock [sid].clk_ [i] =
516- TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
531+ if (sid != -1 ) {
532+ // sync global state back
533+ for (uptr i = 0 ; i < kThreadSlotCount ; i++)
534+ TsanLaunchInfo->Clock [sid].clk_ [i] =
535+ TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
536+ }
517537}
518538
519539DEVICE_EXTERN_C_INLINE void __tsan_group_barrier () {
520- Sid sid = GetCurrentSid ();
540+ int sid = GetCurrentSid ();
521541
522- // sync current thread clock to global state
523- TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
524- TsanLaunchInfo->Clock [sid].clk_ [sid];
542+ if (sid != -1 ) {
543+ // sync current thread clock to global state
544+ TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [sid] =
545+ TsanLaunchInfo->Clock [sid].clk_ [sid];
546+ }
525547
526548 __spirv_ControlBarrier (__spv::Scope::Workgroup, __spv::Scope::Workgroup,
527549 __spv::MemorySemanticsMask::SequentiallyConsistent |
528550 __spv::MemorySemanticsMask::CrossWorkgroupMemory |
529551 __spv::MemorySemanticsMask::WorkgroupMemory);
530552
531- // sync global state back
532- for (uptr i = 0 ; i < kThreadSlotCount ; i++)
533- TsanLaunchInfo->Clock [sid].clk_ [i] =
534- TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
553+ if (sid != -1 ) {
554+ // sync global state back
555+ for (uptr i = 0 ; i < kThreadSlotCount ; i++)
556+ TsanLaunchInfo->Clock [sid].clk_ [i] =
557+ TsanLaunchInfo->Clock [kThreadSlotCount ].clk_ [i];
558+ }
535559}
536560
537561#endif // __SPIR__ || __SPIRV__
0 commit comments