@@ -32,6 +32,7 @@ template <>
32
32
struct zmm_vector <int32_t > {
33
33
using type_t = int32_t ;
34
34
using reg_t = __m512i;
35
+ using regi_t = __m512i;
35
36
using halfreg_t = __m256i;
36
37
using opmask_t = __mmask16;
37
38
static const uint8_t numlanes = 16 ;
@@ -65,6 +66,10 @@ struct zmm_vector<int32_t> {
65
66
{
66
67
return _mm512_cmp_epi32_mask (x, y, _MM_CMPINT_NLT);
67
68
}
69
+ static opmask_t eq (reg_t x, reg_t y)
70
+ {
71
+ return _mm512_cmpeq_epi32_mask (x, y);
72
+ }
68
73
static opmask_t get_partial_loadmask (uint64_t num_to_read)
69
74
{
70
75
return ((0x1ull << num_to_read) - 0x1ull );
@@ -123,6 +128,40 @@ struct zmm_vector<int32_t> {
123
128
{
124
129
return _mm512_set1_epi32 (v);
125
130
}
131
+ static regi_t seti (int v1,
132
+ int v2,
133
+ int v3,
134
+ int v4,
135
+ int v5,
136
+ int v6,
137
+ int v7,
138
+ int v8,
139
+ int v9,
140
+ int v10,
141
+ int v11,
142
+ int v12,
143
+ int v13,
144
+ int v14,
145
+ int v15,
146
+ int v16)
147
+ {
148
+ return _mm512_set_epi32 (v1,
149
+ v2,
150
+ v3,
151
+ v4,
152
+ v5,
153
+ v6,
154
+ v7,
155
+ v8,
156
+ v9,
157
+ v10,
158
+ v11,
159
+ v12,
160
+ v13,
161
+ v14,
162
+ v15,
163
+ v16);
164
+ }
126
165
template <uint8_t mask>
127
166
static reg_t shuffle (reg_t zmm)
128
167
{
@@ -171,6 +210,7 @@ template <>
171
210
struct zmm_vector <uint32_t > {
172
211
using type_t = uint32_t ;
173
212
using reg_t = __m512i;
213
+ using regi_t = __m512i;
174
214
using halfreg_t = __m256i;
175
215
using opmask_t = __mmask16;
176
216
static const uint8_t numlanes = 16 ;
@@ -214,6 +254,10 @@ struct zmm_vector<uint32_t> {
214
254
{
215
255
return _mm512_cmp_epu32_mask (x, y, _MM_CMPINT_NLT);
216
256
}
257
+ static opmask_t eq (reg_t x, reg_t y)
258
+ {
259
+ return _mm512_cmpeq_epu32_mask (x, y);
260
+ }
217
261
static opmask_t get_partial_loadmask (uint64_t num_to_read)
218
262
{
219
263
return ((0x1ull << num_to_read) - 0x1ull );
@@ -262,6 +306,40 @@ struct zmm_vector<uint32_t> {
262
306
{
263
307
return _mm512_set1_epi32 (v);
264
308
}
309
+ static regi_t seti (int v1,
310
+ int v2,
311
+ int v3,
312
+ int v4,
313
+ int v5,
314
+ int v6,
315
+ int v7,
316
+ int v8,
317
+ int v9,
318
+ int v10,
319
+ int v11,
320
+ int v12,
321
+ int v13,
322
+ int v14,
323
+ int v15,
324
+ int v16)
325
+ {
326
+ return _mm512_set_epi32 (v1,
327
+ v2,
328
+ v3,
329
+ v4,
330
+ v5,
331
+ v6,
332
+ v7,
333
+ v8,
334
+ v9,
335
+ v10,
336
+ v11,
337
+ v12,
338
+ v13,
339
+ v14,
340
+ v15,
341
+ v16);
342
+ }
265
343
template <uint8_t mask>
266
344
static reg_t shuffle (reg_t zmm)
267
345
{
@@ -310,6 +388,7 @@ template <>
310
388
struct zmm_vector <float > {
311
389
using type_t = float ;
312
390
using reg_t = __m512;
391
+ using regi_t = __m512i;
313
392
using halfreg_t = __m256;
314
393
using opmask_t = __mmask16;
315
394
static const uint8_t numlanes = 16 ;
@@ -343,6 +422,10 @@ struct zmm_vector<float> {
343
422
{
344
423
return _mm512_cmp_ps_mask (x, y, _CMP_GE_OQ);
345
424
}
425
+ static opmask_t eq (reg_t x, reg_t y)
426
+ {
427
+ return _mm512_cmpeq_ps_mask (x, y);
428
+ }
346
429
static opmask_t get_partial_loadmask (uint64_t num_to_read)
347
430
{
348
431
return ((0x1ull << num_to_read) - 0x1ull );
@@ -415,6 +498,40 @@ struct zmm_vector<float> {
415
498
{
416
499
return _mm512_set1_ps (v);
417
500
}
501
+ static regi_t seti (int v1,
502
+ int v2,
503
+ int v3,
504
+ int v4,
505
+ int v5,
506
+ int v6,
507
+ int v7,
508
+ int v8,
509
+ int v9,
510
+ int v10,
511
+ int v11,
512
+ int v12,
513
+ int v13,
514
+ int v14,
515
+ int v15,
516
+ int v16)
517
+ {
518
+ return _mm512_set_epi32 (v1,
519
+ v2,
520
+ v3,
521
+ v4,
522
+ v5,
523
+ v6,
524
+ v7,
525
+ v8,
526
+ v9,
527
+ v10,
528
+ v11,
529
+ v12,
530
+ v13,
531
+ v14,
532
+ v15,
533
+ v16);
534
+ }
418
535
template <uint8_t mask>
419
536
static reg_t shuffle (reg_t zmm)
420
537
{
0 commit comments