@@ -532,6 +532,40 @@ T shift_sub_group_left(unsigned int member_mask,
532532 throw sycl::exception (sycl::errc::runtime, " Masked version of shift_sub_group_left not "
533533 " supported on host device." );
534534#endif // __SYCL_DEVICE_ONLY__
535+
536+ constexpr unsigned int MAX_BARRIER_ID = 16 ;
537+
538+ sycl::ext::oneapi::experimental::work_group_static<uint32_t [MAX_BARRIER_ID]>
539+ bar_counters;
540+
541+ void barrier_arrive_aligned (unsigned int barrier_id,
542+ unsigned int thread_count) {
543+ sycl::atomic_ref<uint32_t , sycl::memory_order::seq_cst,
544+ sycl::memory_scope::work_group>
545+ count_ref (bar_counters[barrier_id]);
546+ uint32_t _unused = 0 ;
547+ count_ref.compare_exchange_strong (_unused, thread_count);
548+ --count_ref;
549+ }
550+
551+ void barrier_sync_aligned (unsigned int barrier_id,
552+ unsigned int thread_count) {
553+ barrier_arrive_aligned (barrier_id, thread_count);
554+
555+ sycl::atomic_ref<uint32_t , sycl::memory_order::seq_cst,
556+ sycl::memory_scope::work_group>
557+ count_ref (bar_counters[barrier_id]);
558+
559+ auto it = sycl::ext::oneapi::this_work_item::get_nd_item<3 >();
560+ auto sg = it.get_sub_group ();
561+
562+ if (sg.leader ()) {
563+ while (count_ref.load (sycl::memory_order::seq_cst) != 0 )
564+ ;
565+ }
566+
567+ sycl::group_barrier (sg);
568+ }
535569}
536570
537571// / Masked version of shift_sub_group_right, which execute masked sub-group
0 commit comments