@@ -253,23 +253,44 @@ define <1 x i64> @test_scalar_no_ifma(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z)
253253 ret <1 x i64 > %res
254254}
255255
256+ ; 40-bit and 13-bit, too wide
256257define <8 x i64 > @test_mixed_width_too_wide (<8 x i64 > %x , <8 x i64 > %y , <8 x i64 > %z ) {
257- ; 40-bit and 13-bit, too wide
258258; AVX-LABEL: test_mixed_width_too_wide:
259259; AVX: # %bb.0:
260- ; AVX-NEXT: vmovaps %ymm5, %ymm1
261- ; AVX-NEXT: vmovaps %ymm4, %ymm0
260+ ; AVX-NEXT: vpbroadcastq {{.*#+}} ymm6 = [8191,8191,8191,8191]
261+ ; AVX-NEXT: vpand %ymm6, %ymm2, %ymm2
262+ ; AVX-NEXT: vpand %ymm6, %ymm3, %ymm3
263+ ; AVX-NEXT: vpmovzxdq {{.*#+}} ymm6 = [2155905028,2155905036,2155905044,2155905052]
264+ ; AVX-NEXT: vpshufb %ymm6, %ymm1, %ymm7
265+ ; AVX-NEXT: vpmuludq %ymm3, %ymm7, %ymm7
266+ ; AVX-NEXT: vpsllq $32, %ymm7, %ymm7
267+ ; AVX-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
268+ ; AVX-NEXT: vpshufb %ymm6, %ymm0, %ymm3
269+ ; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm3
270+ ; AVX-NEXT: vpsllq $32, %ymm3, %ymm3
271+ ; AVX-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
272+ ; AVX-NEXT: vpaddq %ymm0, %ymm4, %ymm0
273+ ; AVX-NEXT: vpaddq %ymm3, %ymm0, %ymm0
274+ ; AVX-NEXT: vpaddq %ymm1, %ymm5, %ymm1
275+ ; AVX-NEXT: vpaddq %ymm7, %ymm1, %ymm1
262276; AVX-NEXT: retq
263277;
264278; AVX512-LABEL: test_mixed_width_too_wide:
265279; AVX512: # %bb.0:
266- ; AVX512-NEXT: vmovaps %zmm2, %zmm0
280+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
281+ ; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm3
282+ ; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm0
283+ ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
284+ ; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
285+ ; AVX512-NEXT: vpsllq $32, %zmm0, %zmm0
286+ ; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm1
287+ ; AVX512-NEXT: vpaddq %zmm0, %zmm1, %zmm0
267288; AVX512-NEXT: retq
268289 %x40 = and <8 x i64 > %x , splat (i64 1099511627775 )
269290 %y13 = and <8 x i64 > %y , splat (i64 8191 )
270291 %mul = mul <8 x i64 > %x40 , %y13
271292 %res = add <8 x i64 > %z , %mul
272- ret <8 x i64 > %z
293+ ret <8 x i64 > %res
273294}
274295
275296define <8 x i64 > @test_zext32_inputs_not_safe (<8 x i32 > %xi32 , <8 x i32 > %yi32 , <8 x i64 > %z ) {
@@ -352,8 +373,8 @@ define <16 x i64> @test_1024_combine_split(<16 x i64> %x, <16 x i64> %y, <16 x i
352373 ret <16 x i64 > %res
353374}
354375
355- define <1 x i64 > @test_not_i1 (<1 x i64 > %x , <1 x i64 > %y , <1 x i64 > %z ) {
356- ; X64-LABEL: test_not_i1 :
376+ define <1 x i64 > @test_not_v1i64 (<1 x i64 > %x , <1 x i64 > %y , <1 x i64 > %z ) {
377+ ; X64-LABEL: test_not_v1i64 :
357378; X64: # %bb.0:
358379; X64-NEXT: andl $67108863, %edi # imm = 0x3FFFFFF
359380; X64-NEXT: imulq %rdi, %rdi
@@ -366,24 +387,24 @@ define <1 x i64> @test_not_i1(<1 x i64> %x, <1 x i64> %y, <1 x i64> %z) {
366387 ret <1 x i64 > %res
367388}
368389
369- define <3 x i64 > @test_i3 (<3 x i64 > %x , <3 x i64 > %y , <3 x i64 > %z ) {
370- ; AVX-LABEL: test_i3 :
390+ define <3 x i64 > @test_v3i64 (<3 x i64 > %x , <3 x i64 > %y , <3 x i64 > %z ) {
391+ ; AVX-LABEL: test_v3i64 :
371392; AVX: # %bb.0:
372393; AVX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
373394; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
374395; AVX-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
375396; AVX-NEXT: vpaddq %ymm2, %ymm0, %ymm0
376397; AVX-NEXT: retq
377398;
378- ; AVX512-NOVL-LABEL: test_i3 :
399+ ; AVX512-NOVL-LABEL: test_v3i64 :
379400; AVX512-NOVL: # %bb.0:
380401; AVX512-NOVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [67108863,67108863,67108863,67108863]
381402; AVX512-NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
382403; AVX512-NOVL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
383404; AVX512-NOVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
384405; AVX512-NOVL-NEXT: retq
385406;
386- ; AVX512VL-LABEL: test_i3 :
407+ ; AVX512VL-LABEL: test_v3i64 :
387408; AVX512VL: # %bb.0:
388409; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
389410; AVX512VL-NEXT: vpmuludq %ymm0, %ymm0, %ymm0
@@ -396,8 +417,8 @@ define <3 x i64> @test_i3(<3 x i64> %x, <3 x i64> %y, <3 x i64> %z) {
396417 ret <3 x i64 > %res
397418}
398419
399- define <5 x i64 > @test_i5 (<5 x i64 > %x , <5 x i64 > %y , <5 x i64 > %z ) {
400- ; AVX-LABEL: test_i5 :
420+ define <5 x i64 > @test_v5i64 (<5 x i64 > %x , <5 x i64 > %y , <5 x i64 > %z ) {
421+ ; AVX-LABEL: test_v5i64 :
401422; AVX: # %bb.0:
402423; AVX-NEXT: movq %rdi, %rax
403424; AVX-NEXT: vmovq %r8, %xmm0
@@ -427,7 +448,7 @@ define <5 x i64> @test_i5(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) {
427448; AVX-NEXT: vzeroupper
428449; AVX-NEXT: retq
429450;
430- ; AVX512-LABEL: test_i5 :
451+ ; AVX512-LABEL: test_v5i64 :
431452; AVX512: # %bb.0:
432453; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
433454; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
@@ -440,8 +461,8 @@ define <5 x i64> @test_i5(<5 x i64> %x, <5 x i64> %y, <5 x i64> %z) {
440461 ret <5 x i64 > %res
441462}
442463
443- define <6 x i64 > @test_i6 (<6 x i64 > %x , <6 x i64 > %y , <6 x i64 > %z ) {
444- ; AVX-LABEL: test_i6 :
464+ define <6 x i64 > @test_v6i64 (<6 x i64 > %x , <6 x i64 > %y , <6 x i64 > %z ) {
465+ ; AVX-LABEL: test_v6i64 :
445466; AVX: # %bb.0:
446467; AVX-NEXT: movq %rdi, %rax
447468; AVX-NEXT: vmovq %r8, %xmm0
@@ -466,7 +487,7 @@ define <6 x i64> @test_i6(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) {
466487; AVX-NEXT: vzeroupper
467488; AVX-NEXT: retq
468489;
469- ; AVX512-LABEL: test_i6 :
490+ ; AVX512-LABEL: test_v6i64 :
470491; AVX512: # %bb.0:
471492; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
472493; AVX512-NEXT: vpmuludq %zmm0, %zmm0, %zmm0
@@ -479,8 +500,8 @@ define <6 x i64> @test_i6(<6 x i64> %x, <6 x i64> %y, <6 x i64> %z) {
479500 ret <6 x i64 > %res
480501}
481502
482- define <9 x i64 > @test_i9 (<9 x i64 > %x , <9 x i64 > %y , <9 x i64 > %z ) {
483- ; AVX-LABEL: test_i9 :
503+ define <9 x i64 > @test_v9i64 (<9 x i64 > %x , <9 x i64 > %y , <9 x i64 > %z ) {
504+ ; AVX-LABEL: test_v9i64 :
484505; AVX: # %bb.0:
485506; AVX-NEXT: movq %rdi, %rax
486507; AVX-NEXT: vmovq %r8, %xmm0
@@ -518,7 +539,7 @@ define <9 x i64> @test_i9(<9 x i64> %x, <9 x i64> %y, <9 x i64> %z) {
518539; AVX-NEXT: vzeroupper
519540; AVX-NEXT: retq
520541;
521- ; AVX512-LABEL: test_i9 :
542+ ; AVX512-LABEL: test_v9i64 :
522543; AVX512: # %bb.0:
523544; AVX512-NEXT: movq %rdi, %rax
524545; AVX512-NEXT: vmovq %r8, %xmm0
0 commit comments