Skip to content

Commit 782759b

Browse files
authored
DAG: Use poison when widening build_vector (#167631)
Test changes are mostly noise. There are a few improvements and a few regressions.
1 parent 0c0c1a7 commit 782759b

39 files changed

+4391
-4470
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6057,11 +6057,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
60576057

60586058
SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
60596059
SDLoc dl(N);
6060-
// Build a vector with undefined for the new nodes.
6060+
// Build a vector with poison for the new nodes.
60616061
EVT VT = N->getValueType(0);
60626062

60636063
// Integer BUILD_VECTOR operands may be larger than the node's vector element
6064-
// type. The UNDEFs need to have the same type as the existing operands.
6064+
// type. The POISONs need to have the same type as the existing operands.
60656065
EVT EltVT = N->getOperand(0).getValueType();
60666066
unsigned NumElts = VT.getVectorNumElements();
60676067

@@ -6070,7 +6070,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
60706070

60716071
SmallVector<SDValue, 16> NewOps(N->ops());
60726072
assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
6073-
NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
6073+
NewOps.append(WidenNumElts - NumElts, DAG.getPOISON(EltVT));
60746074

60756075
return DAG.getBuildVector(WidenVT, dl, NewOps);
60766076
}

llvm/test/CodeGen/AArch64/fsh.ll

Lines changed: 52 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -3537,27 +3537,22 @@ define <7 x i32> @rotl_v7i32_c(<7 x i32> %a) {
35373537
; CHECK-SD: // %bb.0: // %entry
35383538
; CHECK-SD-NEXT: fmov s0, w0
35393539
; CHECK-SD-NEXT: fmov s1, w4
3540-
; CHECK-SD-NEXT: adrp x8, .LCPI108_0
3541-
; CHECK-SD-NEXT: adrp x9, .LCPI108_1
3542-
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI108_0]
3543-
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI108_1]
35443540
; CHECK-SD-NEXT: mov v0.s[1], w1
35453541
; CHECK-SD-NEXT: mov v1.s[1], w5
35463542
; CHECK-SD-NEXT: mov v0.s[2], w2
35473543
; CHECK-SD-NEXT: mov v1.s[2], w6
35483544
; CHECK-SD-NEXT: mov v0.s[3], w3
3549-
; CHECK-SD-NEXT: ushl v2.4s, v1.4s, v2.4s
3550-
; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v3.4s
3551-
; CHECK-SD-NEXT: shl v4.4s, v0.4s, #3
3552-
; CHECK-SD-NEXT: usra v4.4s, v0.4s, #29
3553-
; CHECK-SD-NEXT: orr v0.16b, v1.16b, v2.16b
3554-
; CHECK-SD-NEXT: mov w1, v4.s[1]
3555-
; CHECK-SD-NEXT: mov w2, v4.s[2]
3556-
; CHECK-SD-NEXT: mov w3, v4.s[3]
3557-
; CHECK-SD-NEXT: mov w5, v0.s[1]
3558-
; CHECK-SD-NEXT: mov w6, v0.s[2]
3559-
; CHECK-SD-NEXT: fmov w0, s4
3560-
; CHECK-SD-NEXT: fmov w4, s0
3545+
; CHECK-SD-NEXT: shl v3.4s, v1.4s, #3
3546+
; CHECK-SD-NEXT: usra v3.4s, v1.4s, #29
3547+
; CHECK-SD-NEXT: shl v2.4s, v0.4s, #3
3548+
; CHECK-SD-NEXT: mov w5, v3.s[1]
3549+
; CHECK-SD-NEXT: mov w6, v3.s[2]
3550+
; CHECK-SD-NEXT: fmov w4, s3
3551+
; CHECK-SD-NEXT: usra v2.4s, v0.4s, #29
3552+
; CHECK-SD-NEXT: mov w1, v2.s[1]
3553+
; CHECK-SD-NEXT: mov w2, v2.s[2]
3554+
; CHECK-SD-NEXT: mov w3, v2.s[3]
3555+
; CHECK-SD-NEXT: fmov w0, s2
35613556
; CHECK-SD-NEXT: ret
35623557
;
35633558
; CHECK-GI-LABEL: rotl_v7i32_c:
@@ -3614,27 +3609,22 @@ define <7 x i32> @rotr_v7i32_c(<7 x i32> %a) {
36143609
; CHECK-SD: // %bb.0: // %entry
36153610
; CHECK-SD-NEXT: fmov s0, w0
36163611
; CHECK-SD-NEXT: fmov s1, w4
3617-
; CHECK-SD-NEXT: adrp x8, .LCPI109_0
3618-
; CHECK-SD-NEXT: adrp x9, .LCPI109_1
3619-
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI109_0]
3620-
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI109_1]
36213612
; CHECK-SD-NEXT: mov v0.s[1], w1
36223613
; CHECK-SD-NEXT: mov v1.s[1], w5
36233614
; CHECK-SD-NEXT: mov v0.s[2], w2
36243615
; CHECK-SD-NEXT: mov v1.s[2], w6
36253616
; CHECK-SD-NEXT: mov v0.s[3], w3
3626-
; CHECK-SD-NEXT: ushl v2.4s, v1.4s, v2.4s
3627-
; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v3.4s
3628-
; CHECK-SD-NEXT: shl v4.4s, v0.4s, #29
3629-
; CHECK-SD-NEXT: usra v4.4s, v0.4s, #3
3630-
; CHECK-SD-NEXT: orr v0.16b, v1.16b, v2.16b
3631-
; CHECK-SD-NEXT: mov w1, v4.s[1]
3632-
; CHECK-SD-NEXT: mov w2, v4.s[2]
3633-
; CHECK-SD-NEXT: mov w3, v4.s[3]
3634-
; CHECK-SD-NEXT: mov w5, v0.s[1]
3635-
; CHECK-SD-NEXT: mov w6, v0.s[2]
3636-
; CHECK-SD-NEXT: fmov w0, s4
3637-
; CHECK-SD-NEXT: fmov w4, s0
3617+
; CHECK-SD-NEXT: shl v3.4s, v1.4s, #29
3618+
; CHECK-SD-NEXT: usra v3.4s, v1.4s, #3
3619+
; CHECK-SD-NEXT: shl v2.4s, v0.4s, #29
3620+
; CHECK-SD-NEXT: mov w5, v3.s[1]
3621+
; CHECK-SD-NEXT: mov w6, v3.s[2]
3622+
; CHECK-SD-NEXT: fmov w4, s3
3623+
; CHECK-SD-NEXT: usra v2.4s, v0.4s, #3
3624+
; CHECK-SD-NEXT: mov w1, v2.s[1]
3625+
; CHECK-SD-NEXT: mov w2, v2.s[2]
3626+
; CHECK-SD-NEXT: mov w3, v2.s[3]
3627+
; CHECK-SD-NEXT: fmov w0, s2
36383628
; CHECK-SD-NEXT: ret
36393629
;
36403630
; CHECK-GI-LABEL: rotr_v7i32_c:
@@ -4132,36 +4122,31 @@ define <7 x i32> @fshl_v7i32_c(<7 x i32> %a, <7 x i32> %b) {
41324122
; CHECK-SD-LABEL: fshl_v7i32_c:
41334123
; CHECK-SD: // %bb.0: // %entry
41344124
; CHECK-SD-NEXT: fmov s0, w0
4135-
; CHECK-SD-NEXT: fmov s2, w4
4136-
; CHECK-SD-NEXT: ldr s1, [sp, #24]
4137-
; CHECK-SD-NEXT: fmov s3, w7
4125+
; CHECK-SD-NEXT: fmov s1, w4
41384126
; CHECK-SD-NEXT: mov x8, sp
4127+
; CHECK-SD-NEXT: fmov s2, w7
4128+
; CHECK-SD-NEXT: ldr s3, [sp, #24]
41394129
; CHECK-SD-NEXT: add x9, sp, #32
4140-
; CHECK-SD-NEXT: ld1 { v1.s }[1], [x9]
4141-
; CHECK-SD-NEXT: add x9, sp, #40
4142-
; CHECK-SD-NEXT: adrp x10, .LCPI134_1
41434130
; CHECK-SD-NEXT: mov v0.s[1], w1
4144-
; CHECK-SD-NEXT: mov v2.s[1], w5
4145-
; CHECK-SD-NEXT: ldr q5, [x10, :lo12:.LCPI134_1]
4146-
; CHECK-SD-NEXT: ld1 { v3.s }[1], [x8]
4131+
; CHECK-SD-NEXT: mov v1.s[1], w5
4132+
; CHECK-SD-NEXT: ld1 { v3.s }[1], [x9]
4133+
; CHECK-SD-NEXT: ld1 { v2.s }[1], [x8]
41474134
; CHECK-SD-NEXT: add x8, sp, #8
4148-
; CHECK-SD-NEXT: ld1 { v1.s }[2], [x9]
4149-
; CHECK-SD-NEXT: add x9, sp, #16
4135+
; CHECK-SD-NEXT: add x9, sp, #40
4136+
; CHECK-SD-NEXT: ld1 { v3.s }[2], [x9]
41504137
; CHECK-SD-NEXT: mov v0.s[2], w2
4151-
; CHECK-SD-NEXT: mov v2.s[2], w6
4152-
; CHECK-SD-NEXT: ld1 { v3.s }[2], [x8]
4153-
; CHECK-SD-NEXT: adrp x8, .LCPI134_0
4154-
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI134_0]
4155-
; CHECK-SD-NEXT: ld1 { v3.s }[3], [x9]
4138+
; CHECK-SD-NEXT: mov v1.s[2], w6
4139+
; CHECK-SD-NEXT: ld1 { v2.s }[2], [x8]
4140+
; CHECK-SD-NEXT: add x8, sp, #16
4141+
; CHECK-SD-NEXT: ld1 { v2.s }[3], [x8]
41564142
; CHECK-SD-NEXT: mov v0.s[3], w3
4157-
; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v4.4s
4158-
; CHECK-SD-NEXT: ushl v2.4s, v2.4s, v5.4s
4159-
; CHECK-SD-NEXT: orr v1.16b, v2.16b, v1.16b
4143+
; CHECK-SD-NEXT: shl v1.4s, v1.4s, #3
4144+
; CHECK-SD-NEXT: usra v1.4s, v3.4s, #29
41604145
; CHECK-SD-NEXT: shl v0.4s, v0.4s, #3
41614146
; CHECK-SD-NEXT: mov w5, v1.s[1]
41624147
; CHECK-SD-NEXT: mov w6, v1.s[2]
41634148
; CHECK-SD-NEXT: fmov w4, s1
4164-
; CHECK-SD-NEXT: usra v0.4s, v3.4s, #29
4149+
; CHECK-SD-NEXT: usra v0.4s, v2.4s, #29
41654150
; CHECK-SD-NEXT: mov w1, v0.s[1]
41664151
; CHECK-SD-NEXT: mov w2, v0.s[2]
41674152
; CHECK-SD-NEXT: mov w3, v0.s[3]
@@ -4225,36 +4210,31 @@ define <7 x i32> @fshr_v7i32_c(<7 x i32> %a, <7 x i32> %b) {
42254210
; CHECK-SD-LABEL: fshr_v7i32_c:
42264211
; CHECK-SD: // %bb.0: // %entry
42274212
; CHECK-SD-NEXT: fmov s0, w0
4228-
; CHECK-SD-NEXT: fmov s2, w4
4229-
; CHECK-SD-NEXT: ldr s1, [sp, #24]
4230-
; CHECK-SD-NEXT: fmov s3, w7
4213+
; CHECK-SD-NEXT: fmov s1, w4
42314214
; CHECK-SD-NEXT: mov x8, sp
4215+
; CHECK-SD-NEXT: fmov s2, w7
4216+
; CHECK-SD-NEXT: ldr s3, [sp, #24]
42324217
; CHECK-SD-NEXT: add x9, sp, #32
4233-
; CHECK-SD-NEXT: ld1 { v1.s }[1], [x9]
4234-
; CHECK-SD-NEXT: add x9, sp, #40
4235-
; CHECK-SD-NEXT: adrp x10, .LCPI135_1
42364218
; CHECK-SD-NEXT: mov v0.s[1], w1
4237-
; CHECK-SD-NEXT: mov v2.s[1], w5
4238-
; CHECK-SD-NEXT: ldr q5, [x10, :lo12:.LCPI135_1]
4239-
; CHECK-SD-NEXT: ld1 { v3.s }[1], [x8]
4219+
; CHECK-SD-NEXT: mov v1.s[1], w5
4220+
; CHECK-SD-NEXT: ld1 { v3.s }[1], [x9]
4221+
; CHECK-SD-NEXT: ld1 { v2.s }[1], [x8]
42404222
; CHECK-SD-NEXT: add x8, sp, #8
4241-
; CHECK-SD-NEXT: ld1 { v1.s }[2], [x9]
4242-
; CHECK-SD-NEXT: add x9, sp, #16
4223+
; CHECK-SD-NEXT: add x9, sp, #40
4224+
; CHECK-SD-NEXT: ld1 { v3.s }[2], [x9]
42434225
; CHECK-SD-NEXT: mov v0.s[2], w2
4244-
; CHECK-SD-NEXT: mov v2.s[2], w6
4245-
; CHECK-SD-NEXT: ld1 { v3.s }[2], [x8]
4246-
; CHECK-SD-NEXT: adrp x8, .LCPI135_0
4247-
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI135_0]
4248-
; CHECK-SD-NEXT: ld1 { v3.s }[3], [x9]
4226+
; CHECK-SD-NEXT: mov v1.s[2], w6
4227+
; CHECK-SD-NEXT: ld1 { v2.s }[2], [x8]
4228+
; CHECK-SD-NEXT: add x8, sp, #16
4229+
; CHECK-SD-NEXT: ld1 { v2.s }[3], [x8]
42494230
; CHECK-SD-NEXT: mov v0.s[3], w3
4250-
; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v4.4s
4251-
; CHECK-SD-NEXT: ushl v2.4s, v2.4s, v5.4s
4252-
; CHECK-SD-NEXT: orr v1.16b, v2.16b, v1.16b
4231+
; CHECK-SD-NEXT: shl v1.4s, v1.4s, #29
4232+
; CHECK-SD-NEXT: usra v1.4s, v3.4s, #3
42534233
; CHECK-SD-NEXT: shl v0.4s, v0.4s, #29
42544234
; CHECK-SD-NEXT: mov w5, v1.s[1]
42554235
; CHECK-SD-NEXT: mov w6, v1.s[2]
42564236
; CHECK-SD-NEXT: fmov w4, s1
4257-
; CHECK-SD-NEXT: usra v0.4s, v3.4s, #3
4237+
; CHECK-SD-NEXT: usra v0.4s, v2.4s, #3
42584238
; CHECK-SD-NEXT: mov w1, v0.s[1]
42594239
; CHECK-SD-NEXT: mov w2, v0.s[2]
42604240
; CHECK-SD-NEXT: mov w3, v0.s[3]

llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
450450
; ARM7-NEXT: .short 9 @ 0x9
451451
; ARM7-NEXT: .short 10 @ 0xa
452452
; ARM7-NEXT: .short 10 @ 0xa
453-
; ARM7-NEXT: .short 10 @ 0xa
453+
; ARM7-NEXT: .short 0 @ 0x0
454454
; ARM7-NEXT: .LCPI4_4:
455455
; ARM7-NEXT: .short 341 @ 0x155
456456
; ARM7-NEXT: .short 292 @ 0x124
@@ -502,7 +502,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
502502
; ARM8-NEXT: .short 9 @ 0x9
503503
; ARM8-NEXT: .short 10 @ 0xa
504504
; ARM8-NEXT: .short 10 @ 0xa
505-
; ARM8-NEXT: .short 10 @ 0xa
505+
; ARM8-NEXT: .short 0 @ 0x0
506506
; ARM8-NEXT: .LCPI4_4:
507507
; ARM8-NEXT: .short 341 @ 0x155
508508
; ARM8-NEXT: .short 292 @ 0x124
@@ -554,7 +554,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
554554
; NEON7-NEXT: .short 9 @ 0x9
555555
; NEON7-NEXT: .short 10 @ 0xa
556556
; NEON7-NEXT: .short 10 @ 0xa
557-
; NEON7-NEXT: .short 10 @ 0xa
557+
; NEON7-NEXT: .short 0 @ 0x0
558558
; NEON7-NEXT: .LCPI4_4:
559559
; NEON7-NEXT: .short 341 @ 0x155
560560
; NEON7-NEXT: .short 292 @ 0x124
@@ -606,7 +606,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
606606
; NEON8-NEXT: .short 9 @ 0x9
607607
; NEON8-NEXT: .short 10 @ 0xa
608608
; NEON8-NEXT: .short 10 @ 0xa
609-
; NEON8-NEXT: .short 10 @ 0xa
609+
; NEON8-NEXT: .short 0 @ 0x0
610610
; NEON8-NEXT: .LCPI4_4:
611611
; NEON8-NEXT: .short 341 @ 0x155
612612
; NEON8-NEXT: .short 292 @ 0x124

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,15 @@
88

99
; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3
1010
define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
11-
; RV32-LABEL: load_factor2_v3:
12-
; RV32: # %bb.0:
13-
; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
14-
; RV32-NEXT: vle32.v v10, (a0)
15-
; RV32-NEXT: li a0, 32
16-
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
17-
; RV32-NEXT: vnsrl.wi v8, v10, 0
18-
; RV32-NEXT: vnsrl.wx v9, v10, a0
19-
; RV32-NEXT: ret
20-
;
21-
; RV64-LABEL: load_factor2_v3:
22-
; RV64: # %bb.0:
23-
; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
24-
; RV64-NEXT: vle32.v v10, (a0)
25-
; RV64-NEXT: li a0, 32
26-
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
27-
; RV64-NEXT: vnsrl.wx v9, v10, a0
28-
; RV64-NEXT: vnsrl.wi v8, v10, 0
29-
; RV64-NEXT: ret
11+
; CHECK-LABEL: load_factor2_v3:
12+
; CHECK: # %bb.0:
13+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
14+
; CHECK-NEXT: vle32.v v10, (a0)
15+
; CHECK-NEXT: li a0, 32
16+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
17+
; CHECK-NEXT: vnsrl.wx v9, v10, a0
18+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
19+
; CHECK-NEXT: ret
3020
%interleaved.vec = load <6 x i32>, ptr %ptr
3121
%v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4>
3222
%v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 1, i32 3, i32 5>

llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ define void @test_urem_vec(ptr %X) nounwind {
579579
; RV32MV-NEXT: vmv.v.x v10, a3
580580
; RV32MV-NEXT: srli a3, a1, 22
581581
; RV32MV-NEXT: or a2, a3, a2
582-
; RV32MV-NEXT: lui a3, 41121
582+
; RV32MV-NEXT: lui a3, 161
583583
; RV32MV-NEXT: slli a1, a1, 10
584584
; RV32MV-NEXT: srli a1, a1, 21
585585
; RV32MV-NEXT: vslide1down.vx v10, v10, a1
@@ -636,7 +636,7 @@ define void @test_urem_vec(ptr %X) nounwind {
636636
; RV64MV-NEXT: lui a3, %hi(.LCPI4_0)
637637
; RV64MV-NEXT: addi a3, a3, %lo(.LCPI4_0)
638638
; RV64MV-NEXT: vle16.v v9, (a3)
639-
; RV64MV-NEXT: lui a3, 41121
639+
; RV64MV-NEXT: lui a3, 161
640640
; RV64MV-NEXT: slli a2, a2, 32
641641
; RV64MV-NEXT: or a1, a1, a2
642642
; RV64MV-NEXT: andi a2, a1, 2047

llvm/test/CodeGen/Thumb2/urem-seteq-illegal-types.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
117117
; CHECK-NEXT: .short 9 @ 0x9
118118
; CHECK-NEXT: .short 10 @ 0xa
119119
; CHECK-NEXT: .short 10 @ 0xa
120-
; CHECK-NEXT: .short 10 @ 0xa
120+
; CHECK-NEXT: .short 0 @ 0x0
121121
; CHECK-NEXT: .LCPI4_4:
122122
; CHECK-NEXT: .short 341 @ 0x155
123123
; CHECK-NEXT: .short 292 @ 0x124

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,8 +1141,8 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
11411141
; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
11421142
; CHECK-AVX2: # %bb.0:
11431143
; CHECK-AVX2-NEXT: subq $56, %rsp
1144+
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
11441145
; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1145-
; CHECK-AVX2-NEXT: vpmovsxbd {{.*#+}} ymm1 = [2,2,0,0,0,0,0,0]
11461146
; CHECK-AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
11471147
; CHECK-AVX2-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
11481148
; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax
@@ -1171,8 +1171,8 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
11711171
;
11721172
; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
11731173
; CHECK-ONLY-AVX512F: # %bb.0:
1174+
; CHECK-ONLY-AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
11741175
; CHECK-ONLY-AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1175-
; CHECK-ONLY-AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm1 = [2,2,0,0,0,0,0,0]
11761176
; CHECK-ONLY-AVX512F-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
11771177
; CHECK-ONLY-AVX512F-NEXT: vpmovdw %zmm0, %ymm0
11781178
; CHECK-ONLY-AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero

0 commit comments

Comments
 (0)