@@ -183,3 +183,110 @@ define <2 x i64> @test_vpmadd52l_mul_zero_scalar(<2 x i64> %x0, <2 x i64> %x1) {
183
183
%1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > <i64 0 , i64 123 >, <2 x i64 > %x1 )
184
184
ret <2 x i64 > %1
185
185
}
186
+
187
+ ; (1 << 51) * (1 << 1) -> 1 << 52 -> low 52 bits are zeroes
188
+ define <2 x i64 > @test_vpmadd52l_mul_lo52_zero (<2 x i64 > %x0 ) {
189
+ ; CHECK-LABEL: test_vpmadd52l_mul_lo52_zero:
190
+ ; CHECK: # %bb.0:
191
+ ; CHECK-NEXT: retq
192
+ %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat (i64 2251799813685248 ), <2 x i64 > splat (i64 2 ))
193
+ ret <2 x i64 > %1
194
+ }
195
+
196
+ ; (1 << 25) * (1 << 26) = 1 << 51 -> high 52 bits are zeroes
197
+ define <2 x i64 > @test_vpmadd52h_mul_hi52_zero (<2 x i64 > %x0 ) {
198
+ ; CHECK-LABEL: test_vpmadd52h_mul_hi52_zero:
199
+ ; CHECK: # %bb.0:
200
+ ; CHECK-NEXT: retq
201
+ %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat (i64 33554432 ), <2 x i64 > splat (i64 67108864 ))
202
+ ret <2 x i64 > %1
203
+ }
204
+
205
+ define <2 x i64 > @test_vpmadd52l_mul_lo52_const (<2 x i64 > %x0 ) {
206
+ ; AVX512-LABEL: test_vpmadd52l_mul_lo52_const:
207
+ ; AVX512: # %bb.0:
208
+ ; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
209
+ ; AVX512-NEXT: retq
210
+ ;
211
+ ; AVX-LABEL: test_vpmadd52l_mul_lo52_const:
212
+ ; AVX: # %bb.0:
213
+ ; AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
214
+ ; AVX-NEXT: retq
215
+ %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat (i64 123 ), <2 x i64 > splat (i64 456 ))
216
+ ret <2 x i64 > %1
217
+ }
218
+
219
+ ; (1 << 51) * (1 << 51) -> 1 << 102 -> the high 52 bits is 1 << 50
220
+ define <2 x i64 > @test_vpmadd52h_mul_hi52_const (<2 x i64 > %x0 ) {
221
+ ; AVX512-LABEL: test_vpmadd52h_mul_hi52_const:
222
+ ; AVX512: # %bb.0:
223
+ ; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
224
+ ; AVX512-NEXT: retq
225
+ ;
226
+ ; AVX-LABEL: test_vpmadd52h_mul_hi52_const:
227
+ ; AVX: # %bb.0:
228
+ ; AVX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
229
+ ; AVX-NEXT: retq
230
+ %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > %x0 , <2 x i64 > splat (i64 2251799813685248 ), <2 x i64 > splat (i64 2251799813685248 ))
231
+ ret <2 x i64 > %1
232
+ }
233
+
234
+ define <2 x i64 > @test_vpmadd52l_mul_lo52_mask (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
235
+ ; CHECK-LABEL: test_vpmadd52l_mul_lo52_mask:
236
+ ; CHECK: # %bb.0:
237
+ ; CHECK-NEXT: retq
238
+ %and1 = and <2 x i64 > %x0 , splat (i64 1073741824 ) ; 1LL << 30
239
+ %and2 = and <2 x i64 > %x1 , splat (i64 1073741824 ) ; 1LL << 30
240
+ %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > %and1 , <2 x i64 > %and2 )
241
+ ret <2 x i64 > %1
242
+ }
243
+
244
+ define <2 x i64 > @test_vpmadd52h_mul_hi52_mask (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
245
+ ; CHECK-LABEL: test_vpmadd52h_mul_hi52_mask:
246
+ ; CHECK: # %bb.0:
247
+ ; CHECK-NEXT: retq
248
+ %and1 = lshr <2 x i64 > %x0 , splat (i64 40 )
249
+ %and2 = lshr <2 x i64 > %x1 , splat (i64 40 )
250
+ %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > %x0 , <2 x i64 > %and1 , <2 x i64 > %and2 )
251
+ ret <2 x i64 > %1
252
+ }
253
+
254
+ define <2 x i64 > @test_vpmadd52l_mul_lo52_mask_negative (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
255
+ ; AVX512-LABEL: test_vpmadd52l_mul_lo52_mask_negative:
256
+ ; AVX512: # %bb.0:
257
+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm2
258
+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
259
+ ; AVX512-NEXT: vpmadd52luq %xmm1, %xmm2, %xmm0
260
+ ; AVX512-NEXT: retq
261
+ ;
262
+ ; AVX-LABEL: test_vpmadd52l_mul_lo52_mask_negative:
263
+ ; AVX: # %bb.0:
264
+ ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
265
+ ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
266
+ ; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm2, %xmm0
267
+ ; AVX-NEXT: retq
268
+ %and1 = and <2 x i64 > %x0 , splat (i64 2097152 ) ; 1LL << 21
269
+ %and2 = and <2 x i64 > %x1 , splat (i64 1073741824 ) ; 1LL << 30
270
+ %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52l.uq.128 (<2 x i64 > %x0 , <2 x i64 > %and1 , <2 x i64 > %and2 )
271
+ ret <2 x i64 > %1
272
+ }
273
+
274
+ define <2 x i64 > @test_vpmadd52h_mul_hi52_negative (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x2 ) {
275
+ ; AVX512-LABEL: test_vpmadd52h_mul_hi52_negative:
276
+ ; AVX512: # %bb.0:
277
+ ; AVX512-NEXT: vpsrlq $30, %xmm0, %xmm2
278
+ ; AVX512-NEXT: vpsrlq $43, %xmm1, %xmm1
279
+ ; AVX512-NEXT: vpmadd52huq %xmm1, %xmm2, %xmm0
280
+ ; AVX512-NEXT: retq
281
+ ;
282
+ ; AVX-LABEL: test_vpmadd52h_mul_hi52_negative:
283
+ ; AVX: # %bb.0:
284
+ ; AVX-NEXT: vpsrlq $30, %xmm0, %xmm2
285
+ ; AVX-NEXT: vpsrlq $43, %xmm1, %xmm1
286
+ ; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm2, %xmm0
287
+ ; AVX-NEXT: retq
288
+ %and1 = lshr <2 x i64 > %x0 , splat (i64 30 )
289
+ %and2 = lshr <2 x i64 > %x1 , splat (i64 43 )
290
+ %1 = call <2 x i64 > @llvm.x86.avx512.vpmadd52h.uq.128 (<2 x i64 > %x0 , <2 x i64 > %and1 , <2 x i64 > %and2 )
291
+ ret <2 x i64 > %1
292
+ }
0 commit comments