3131#ifdef USE_LOOP_UNROLLING
3232#define mix_and_shuffle (t, a, p, b, thread_id ) \
3333 offset[p] = fnv(init0[p] ^ (a + b), ((uint32_t *)&mix[p])[b]) % d_dag_size; \
34- offset[p] = item_ct1.get_sub_group().shuffle( offset[p], t + iShuffleOffset); \
34+ offset[p] = sycl::select_from_group( item_ct1.get_sub_group(), offset[p], t + iShuffleOffset); \
3535 mix[p] = fnv4(mix[p], d_dag[offset[p]].uint4s[thread_id]);
3636#endif
3737
@@ -48,9 +48,13 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
4848 const int thread_id = item_ct1.get_local_id (0 ) & (THREADS_PER_HASH - 1 );
4949 const int mix_idx = thread_id & 3 ;
5050
51- int const iSubGroupThreadId (item_ct1.get_sub_group ().get_local_id ());
51+ auto g = item_ct1.get_sub_group ();
52+
53+ // /int const iSubGroupThreadId(item_ct1.get_sub_group().get_local_id());
54+ int const iSubGroupThreadId (g.get_local_id ());
5255 int const iShuffleOffset (pdShuffleOffsets[iSubGroupThreadId]);
5356
57+
5458#ifndef USE_LOOP_UNROLLING
5559
5660 for (int i = 0 ; i < THREADS_PER_HASH; i += _PARALLEL_HASH) {
@@ -61,29 +65,29 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
6165 // share init among threads
6266 for (int p = 0 ; p < _PARALLEL_HASH; p++) {
6367 sycl::uint2 shuffle[8 ];
64- shuffle[0 ].x () = item_ct1. get_sub_group (). shuffle ( state[0 ].x (), i + p + iShuffleOffset);
65- shuffle[0 ].y () = item_ct1. get_sub_group (). shuffle ( state[0 ].y (), i + p + iShuffleOffset);
68+ shuffle[0 ].x () = sycl::select_from_group (g, state[0 ].x (), i + p + iShuffleOffset);
69+ shuffle[0 ].y () = sycl::select_from_group (g, state[0 ].y (), i + p + iShuffleOffset);
6670
67- shuffle[1 ].x () = item_ct1. get_sub_group (). shuffle ( state[1 ].x (), i + p + iShuffleOffset);
68- shuffle[1 ].y () = item_ct1. get_sub_group (). shuffle ( state[1 ].y (), i + p + iShuffleOffset);
71+ shuffle[1 ].x () = sycl::select_from_group (g, state[1 ].x (), i + p + iShuffleOffset);
72+ shuffle[1 ].y () = sycl::select_from_group (g, state[1 ].y (), i + p + iShuffleOffset);
6973
70- shuffle[2 ].x () = item_ct1. get_sub_group (). shuffle ( state[2 ].x (), i + p + iShuffleOffset);
71- shuffle[2 ].y () = item_ct1. get_sub_group (). shuffle ( state[2 ].y (), i + p + iShuffleOffset);
74+ shuffle[2 ].x () = sycl::select_from_group (g, state[2 ].x (), i + p + iShuffleOffset);
75+ shuffle[2 ].y () = sycl::select_from_group (g, state[2 ].y (), i + p + iShuffleOffset);
7276
73- shuffle[3 ].x () = item_ct1. get_sub_group (). shuffle ( state[3 ].x (), i + p + iShuffleOffset);
74- shuffle[3 ].y () = item_ct1. get_sub_group (). shuffle ( state[3 ].y (), i + p + iShuffleOffset);
77+ shuffle[3 ].x () = sycl::select_from_group (g, state[3 ].x (), i + p + iShuffleOffset);
78+ shuffle[3 ].y () = sycl::select_from_group (g, state[3 ].y (), i + p + iShuffleOffset);
7579
76- shuffle[4 ].x () = item_ct1. get_sub_group (). shuffle ( state[4 ].x (), i + p + iShuffleOffset);
77- shuffle[4 ].y () = item_ct1. get_sub_group (). shuffle ( state[4 ].y (), i + p + iShuffleOffset);
80+ shuffle[4 ].x () = sycl::select_from_group (g, state[4 ].x (), i + p + iShuffleOffset);
81+ shuffle[4 ].y () = sycl::select_from_group (g, state[4 ].y (), i + p + iShuffleOffset);
7882
79- shuffle[5 ].x () = item_ct1. get_sub_group (). shuffle ( state[5 ].x (), i + p + iShuffleOffset);
80- shuffle[5 ].y () = item_ct1. get_sub_group (). shuffle ( state[5 ].y (), i + p + iShuffleOffset);
83+ shuffle[5 ].x () = sycl::select_from_group (g, state[5 ].x (), i + p + iShuffleOffset);
84+ shuffle[5 ].y () = sycl::select_from_group (g, state[5 ].y (), i + p + iShuffleOffset);
8185
82- shuffle[6 ].x () = item_ct1. get_sub_group (). shuffle ( state[6 ].x (), i + p + iShuffleOffset);
83- shuffle[6 ].y () = item_ct1. get_sub_group (). shuffle ( state[6 ].y (), i + p + iShuffleOffset);
86+ shuffle[6 ].x () = sycl::select_from_group (g, state[6 ].x (), i + p + iShuffleOffset);
87+ shuffle[6 ].y () = sycl::select_from_group (g, state[6 ].y (), i + p + iShuffleOffset);
8488
85- shuffle[7 ].x () = item_ct1. get_sub_group (). shuffle ( state[7 ].x (), i + p + iShuffleOffset);
86- shuffle[7 ].y () = item_ct1. get_sub_group (). shuffle ( state[7 ].y (), i + p + iShuffleOffset);
89+ shuffle[7 ].x () = sycl::select_from_group (g, state[7 ].x (), i + p + iShuffleOffset);
90+ shuffle[7 ].y () = sycl::select_from_group (g, state[7 ].y (), i + p + iShuffleOffset);
8791
8892
8993 switch (mix_idx) {
@@ -101,7 +105,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
101105 break ;
102106 }
103107
104- init0[p] = item_ct1.get_sub_group (). shuffle ( shuffle[0 ].x (), iShuffleOffset);
108+ init0[p] = sycl::select_from_group ( item_ct1.get_sub_group (), shuffle[0 ].x (), iShuffleOffset);
105109 }
106110
107111 for (uint32_t a = 0 ; a < ACCESSES; a += 4 )
@@ -115,7 +119,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
115119 {
116120 offset[p] = fnv (init0[p] ^ (a + b), ((uint32_t *)&mix[p])[b]) % d_dag_size;
117121
118- offset[p] = item_ct1.get_sub_group (). shuffle ( offset[p], t + iShuffleOffset);
122+ offset[p] = sycl::select_from_group ( item_ct1.get_sub_group (), offset[p], t + iShuffleOffset);
119123 mix[p] = fnv4 (mix[p], d_dag[offset[p]].uint4s [thread_id]);
120124 }
121125 }
@@ -127,14 +131,14 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
127131
128132 // update mix across threads
129133
130- shuffle[0 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 0 + iShuffleOffset);
131- shuffle[0 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 1 + iShuffleOffset);
132- shuffle[1 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 2 + iShuffleOffset);
133- shuffle[1 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 3 + iShuffleOffset);
134- shuffle[2 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 4 + iShuffleOffset);
135- shuffle[2 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 5 + iShuffleOffset);
136- shuffle[3 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 6 + iShuffleOffset);
137- shuffle[3 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 7 + iShuffleOffset);
134+ shuffle[0 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 0 + iShuffleOffset);
135+ shuffle[0 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 1 + iShuffleOffset);
136+ shuffle[1 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 2 + iShuffleOffset);
137+ shuffle[1 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 3 + iShuffleOffset);
138+ shuffle[2 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 4 + iShuffleOffset);
139+ shuffle[2 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 5 + iShuffleOffset);
140+ shuffle[3 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 6 + iShuffleOffset);
141+ shuffle[3 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 7 + iShuffleOffset);
138142
139143 if ((i + p) == thread_id) {
140144 // move mix into state:
@@ -157,29 +161,29 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
157161 // ////sycl::uint2 shuffle[8];
158162 // ////for (int j = 0; j < 8; j++)
159163 // ////{
160- shuffle_0.x () = item_ct1.get_sub_group (). shuffle ( state[0 ].x (), i + p + iShuffleOffset);
161- shuffle_0.y () = item_ct1.get_sub_group (). shuffle ( state[0 ].y (), i + p + iShuffleOffset);
164+ shuffle_0.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[0 ].x (), i + p + iShuffleOffset);
165+ shuffle_0.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[0 ].y (), i + p + iShuffleOffset);
162166
163- shuffle_1.x () = item_ct1.get_sub_group (). shuffle ( state[1 ].x (), i + p + iShuffleOffset);
164- shuffle_1.y () = item_ct1.get_sub_group (). shuffle ( state[1 ].y (), i + p + iShuffleOffset);
167+ shuffle_1.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[1 ].x (), i + p + iShuffleOffset);
168+ shuffle_1.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[1 ].y (), i + p + iShuffleOffset);
165169
166- shuffle_2.x () = item_ct1.get_sub_group (). shuffle ( state[2 ].x (), i + p + iShuffleOffset);
167- shuffle_2.y () = item_ct1.get_sub_group (). shuffle ( state[2 ].y (), i + p + iShuffleOffset);
170+ shuffle_2.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[2 ].x (), i + p + iShuffleOffset);
171+ shuffle_2.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[2 ].y (), i + p + iShuffleOffset);
168172
169- shuffle_3.x () = item_ct1.get_sub_group (). shuffle ( state[3 ].x (), i + p + iShuffleOffset);
170- shuffle_3.y () = item_ct1.get_sub_group (). shuffle ( state[3 ].y (), i + p + iShuffleOffset);
173+ shuffle_3.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[3 ].x (), i + p + iShuffleOffset);
174+ shuffle_3.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[3 ].y (), i + p + iShuffleOffset);
171175
172- shuffle_4.x () = item_ct1.get_sub_group (). shuffle ( state[4 ].x (), i + p + iShuffleOffset);
173- shuffle_4.y () = item_ct1.get_sub_group (). shuffle ( state[4 ].y (), i + p + iShuffleOffset);
176+ shuffle_4.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[4 ].x (), i + p + iShuffleOffset);
177+ shuffle_4.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[4 ].y (), i + p + iShuffleOffset);
174178
175- shuffle_5.x () = item_ct1.get_sub_group (). shuffle ( state[5 ].x (), i + p + iShuffleOffset);
176- shuffle_5.y () = item_ct1.get_sub_group (). shuffle ( state[5 ].y (), i + p + iShuffleOffset);
179+ shuffle_5.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[5 ].x (), i + p + iShuffleOffset);
180+ shuffle_5.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[5 ].y (), i + p + iShuffleOffset);
177181
178- shuffle_6.x () = item_ct1.get_sub_group (). shuffle ( state[6 ].x (), i + p + iShuffleOffset);
179- shuffle_6.y () = item_ct1.get_sub_group (). shuffle ( state[6 ].y (), i + p + iShuffleOffset);
182+ shuffle_6.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[6 ].x (), i + p + iShuffleOffset);
183+ shuffle_6.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[6 ].y (), i + p + iShuffleOffset);
180184
181- shuffle_7.x () = item_ct1.get_sub_group (). shuffle ( state[7 ].x (), i + p + iShuffleOffset);
182- shuffle_7.y () = item_ct1.get_sub_group (). shuffle ( state[7 ].y (), i + p + iShuffleOffset);
185+ shuffle_7.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[7 ].x (), i + p + iShuffleOffset);
186+ shuffle_7.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[7 ].y (), i + p + iShuffleOffset);
183187
184188 // ///}
185189
@@ -198,7 +202,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
198202 break ;
199203 }
200204
201- init0[p] = item_ct1.get_sub_group (). shuffle ( shuffle_0.x (), iShuffleOffset);
205+ init0[p] = sycl::select_from_group ( item_ct1.get_sub_group (), shuffle_0.x (), iShuffleOffset);
202206 }
203207
204208 // ////for (uint32_t a = 0; a < ACCESSES; a += 4)
@@ -552,14 +556,14 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
552556
553557 // update mix across threads
554558
555- shuffle_0.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 0 + iShuffleOffset);
556- shuffle_0.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 1 + iShuffleOffset);
557- shuffle_1.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 2 + iShuffleOffset);
558- shuffle_1.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 3 + iShuffleOffset);
559- shuffle_2.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 4 + iShuffleOffset);
560- shuffle_2.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 5 + iShuffleOffset);
561- shuffle_3.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 6 + iShuffleOffset);
562- shuffle_3.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 7 + iShuffleOffset);
559+ shuffle_0.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 0 + iShuffleOffset);
560+ shuffle_0.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 1 + iShuffleOffset);
561+ shuffle_1.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 2 + iShuffleOffset);
562+ shuffle_1.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 3 + iShuffleOffset);
563+ shuffle_2.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 4 + iShuffleOffset);
564+ shuffle_2.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 5 + iShuffleOffset);
565+ shuffle_3.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 6 + iShuffleOffset);
566+ shuffle_3.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 7 + iShuffleOffset);
563567
564568 if ((i + p) == thread_id) {
565569 // move mix into state:
0 commit comments