Skip to content

Commit 38bfe9a

Browse files
woruyuRKSimon
andauthored
[DAG] combineVSelectWithAllOnesOrZeros - missing freeze (llvm#150388)
This PR resolves llvm#150069 --------- Co-authored-by: Simon Pilgrim <[email protected]>
1 parent 32efbb7 commit 38bfe9a

File tree

10 files changed

+383
-367
lines changed

10 files changed

+383
-367
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13186,22 +13186,22 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
1318613186

1318713187
// select Cond, -1, x → or Cond, x
1318813188
if (IsTAllOne) {
13189-
SDValue X = DAG.getBitcast(CondVT, FVal);
13189+
SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
1319013190
SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
1319113191
return DAG.getBitcast(VT, Or);
1319213192
}
1319313193

1319413194
// select Cond, x, 0 → and Cond, x
1319513195
if (IsFAllZero) {
13196-
SDValue X = DAG.getBitcast(CondVT, TVal);
13196+
SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
1319713197
SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
1319813198
return DAG.getBitcast(VT, And);
1319913199
}
1320013200

1320113201
// select Cond, 0, x -> and not(Cond), x
1320213202
if (IsTAllZero &&
1320313203
(isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
13204-
SDValue X = DAG.getBitcast(CondVT, FVal);
13204+
SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
1320513205
SDValue And =
1320613206
DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
1320713207
return DAG.getBitcast(VT, And);

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9132,11 +9132,17 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
91329132
SelectionDAG &DAG,
91339133
const X86Subtarget &Subtarget) {
91349134
SDValue SrcVec, IndicesVec;
9135+
9136+
auto PeekThroughFreeze = [](SDValue N) {
9137+
if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
9138+
return N->getOperand(0);
9139+
return N;
9140+
};
91359141
// Check for a match of the permute source vector and permute index elements.
91369142
// This is done by checking that the i-th build_vector operand is of the form:
91379143
// (extract_elt SrcVec, (extract_elt IndicesVec, i)).
91389144
for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) {
9139-
SDValue Op = V.getOperand(Idx);
9145+
SDValue Op = PeekThroughFreeze(V.getOperand(Idx));
91409146
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
91419147
return SDValue();
91429148

llvm/test/CodeGen/AArch64/vselect-ext.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,12 @@ define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i1
175175
; CHECK: ; %bb.0:
176176
; CHECK-NEXT: bic.8h v0, #128, lsl #8
177177
; CHECK-NEXT: movi.4s v1, #10
178-
; CHECK-NEXT: ushll2.4s v2, v0, #0
179-
; CHECK-NEXT: ushll.4s v0, v0, #0
180-
; CHECK-NEXT: cmhi.4s v3, v0, v1
181-
; CHECK-NEXT: cmhi.4s v1, v2, v1
182-
; CHECK-NEXT: and.16b v1, v1, v2
183-
; CHECK-NEXT: and.16b v0, v3, v0
178+
; CHECK-NEXT: ushll.4s v2, v0, #0
179+
; CHECK-NEXT: ushll2.4s v0, v0, #0
180+
; CHECK-NEXT: cmhi.4s v3, v2, v1
181+
; CHECK-NEXT: cmhi.4s v1, v0, v1
182+
; CHECK-NEXT: and.16b v1, v1, v0
183+
; CHECK-NEXT: and.16b v0, v3, v2
184184
; CHECK-NEXT: ret
185185
%ext = zext <8 x i15> %a to <8 x i32>
186186
%cmp = icmp ugt <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
@@ -289,12 +289,12 @@ define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13>
289289
; CHECK: ; %bb.0:
290290
; CHECK-NEXT: bic.8h v0, #224, lsl #8
291291
; CHECK-NEXT: movi.4s v1, #10
292-
; CHECK-NEXT: ushll2.4s v2, v0, #0
293-
; CHECK-NEXT: ushll.4s v0, v0, #0
294-
; CHECK-NEXT: cmeq.4s v3, v0, v1
295-
; CHECK-NEXT: cmeq.4s v1, v2, v1
296-
; CHECK-NEXT: and.16b v1, v1, v2
297-
; CHECK-NEXT: and.16b v0, v3, v0
292+
; CHECK-NEXT: ushll.4s v2, v0, #0
293+
; CHECK-NEXT: ushll2.4s v0, v0, #0
294+
; CHECK-NEXT: cmeq.4s v3, v2, v1
295+
; CHECK-NEXT: cmeq.4s v1, v0, v1
296+
; CHECK-NEXT: and.16b v1, v1, v0
297+
; CHECK-NEXT: and.16b v0, v3, v2
298298
; CHECK-NEXT: ret
299299
%ext = zext <8 x i13> %a to <8 x i32>
300300
%cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
@@ -429,17 +429,17 @@ define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) {
429429
define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) {
430430
; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13:
431431
; CHECK: ; %bb.0:
432-
; CHECK-NEXT: ushll.4s v2, v0, #0
433-
; CHECK-NEXT: ushll2.4s v0, v0, #0
432+
; CHECK-NEXT: ushll2.4s v2, v0, #0
433+
; CHECK-NEXT: ushll.4s v0, v0, #0
434434
; CHECK-NEXT: movi.4s v1, #10
435435
; CHECK-NEXT: shl.4s v0, v0, #19
436436
; CHECK-NEXT: shl.4s v2, v2, #19
437437
; CHECK-NEXT: sshr.4s v0, v0, #19
438438
; CHECK-NEXT: sshr.4s v2, v2, #19
439-
; CHECK-NEXT: cmeq.4s v3, v2, v1
440-
; CHECK-NEXT: cmeq.4s v1, v0, v1
441-
; CHECK-NEXT: and.16b v1, v1, v0
442-
; CHECK-NEXT: and.16b v0, v3, v2
439+
; CHECK-NEXT: cmeq.4s v3, v0, v1
440+
; CHECK-NEXT: cmeq.4s v1, v2, v1
441+
; CHECK-NEXT: and.16b v1, v1, v2
442+
; CHECK-NEXT: and.16b v0, v3, v0
443443
; CHECK-NEXT: ret
444444
%ext = sext <8 x i13> %a to <8 x i32>
445445
%cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
@@ -493,17 +493,17 @@ entry:
493493
define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) {
494494
; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15:
495495
; CHECK: ; %bb.0:
496-
; CHECK-NEXT: ushll.4s v2, v0, #0
497-
; CHECK-NEXT: ushll2.4s v0, v0, #0
496+
; CHECK-NEXT: ushll2.4s v2, v0, #0
497+
; CHECK-NEXT: ushll.4s v0, v0, #0
498498
; CHECK-NEXT: movi.4s v1, #10
499499
; CHECK-NEXT: shl.4s v0, v0, #17
500500
; CHECK-NEXT: shl.4s v2, v2, #17
501501
; CHECK-NEXT: sshr.4s v0, v0, #17
502502
; CHECK-NEXT: sshr.4s v2, v2, #17
503-
; CHECK-NEXT: cmge.4s v3, v2, v1
504-
; CHECK-NEXT: cmge.4s v1, v0, v1
505-
; CHECK-NEXT: and.16b v1, v1, v0
506-
; CHECK-NEXT: and.16b v0, v3, v2
503+
; CHECK-NEXT: cmge.4s v3, v0, v1
504+
; CHECK-NEXT: cmge.4s v1, v2, v1
505+
; CHECK-NEXT: and.16b v1, v1, v2
506+
; CHECK-NEXT: and.16b v0, v3, v0
507507
; CHECK-NEXT: ret
508508
%ext = sext <8 x i15> %a to <8 x i32>
509509
%cmp = icmp sge <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>

llvm/test/CodeGen/X86/avg-mask.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,11 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
177177
; AVX512F-NEXT: shrq $32, %rdi
178178
; AVX512F-NEXT: shrq $48, %rax
179179
; AVX512F-NEXT: shrl $16, %ecx
180-
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
181-
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
182-
; AVX512F-NEXT: vpavgb %ymm2, %ymm3, %ymm2
180+
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm2
181+
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
182+
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
183183
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
184-
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
184+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
185185
; AVX512F-NEXT: kmovw %ecx, %k2
186186
; AVX512F-NEXT: kmovw %eax, %k3
187187
; AVX512F-NEXT: kmovw %edi, %k4
@@ -364,11 +364,11 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nou
364364
; AVX512F: # %bb.0:
365365
; AVX512F-NEXT: kmovw %edi, %k1
366366
; AVX512F-NEXT: shrl $16, %edi
367-
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
368-
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
369-
; AVX512F-NEXT: vpavgw %ymm2, %ymm3, %ymm2
367+
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm2
368+
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
369+
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
370370
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
371-
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
371+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
372372
; AVX512F-NEXT: kmovw %edi, %k2
373373
; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
374374
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1

llvm/test/CodeGen/X86/avx512-ext.ll

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone {
77
; KNL-LABEL: zext_8x8mem_to_8x16:
88
; KNL: # %bb.0:
9-
; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
9+
; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
10+
; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1011
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1112
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1213
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -21,7 +22,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
2122
;
2223
; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
2324
; AVX512DQNOBW: # %bb.0:
24-
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
25+
; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
26+
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2527
; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
2628
; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
2729
; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -35,7 +37,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
3537
define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone {
3638
; KNL-LABEL: sext_8x8mem_to_8x16:
3739
; KNL: # %bb.0:
38-
; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
40+
; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
41+
; KNL-NEXT: vpmovsxbw %xmm1, %xmm1
3942
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
4043
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
4144
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -50,7 +53,8 @@ define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
5053
;
5154
; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
5255
; AVX512DQNOBW: # %bb.0:
53-
; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
56+
; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
57+
; AVX512DQNOBW-NEXT: vpmovsxbw %xmm1, %xmm1
5458
; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
5559
; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
5660
; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -208,8 +212,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
208212
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
209213
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
210214
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
211-
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
212-
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
215+
; KNL-NEXT: vmovdqu (%rdi), %ymm2
216+
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
217+
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
218+
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
213219
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
214220
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
215221
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -231,8 +237,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
231237
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
232238
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
233239
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
234-
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
235-
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
240+
; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
241+
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
242+
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
243+
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
236244
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
237245
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
238246
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -253,8 +261,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
253261
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
254262
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
255263
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
256-
; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
257-
; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
264+
; KNL-NEXT: vmovdqu (%rdi), %ymm2
265+
; KNL-NEXT: vpmovsxbw %xmm2, %ymm3
266+
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
267+
; KNL-NEXT: vpmovsxbw %xmm2, %ymm2
258268
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
259269
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
260270
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -276,8 +286,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
276286
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
277287
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
278288
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
279-
; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2
280-
; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3
289+
; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
290+
; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm3
291+
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
292+
; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm2
281293
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
282294
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
283295
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0

llvm/test/CodeGen/X86/pr78897.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
2222
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
2323
; X86-SSE2-NEXT: pxor %xmm0, %xmm0
2424
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
25-
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
25+
; X86-SSE2-NEXT: movq {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,0,0,0,0,0,0,0,0]
2626
; X86-SSE2-NEXT: pand %xmm0, %xmm1
2727
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
2828
; X86-SSE2-NEXT: movd %xmm2, %esi

llvm/test/CodeGen/X86/sqrt-fastmath.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -410,34 +410,34 @@ define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
410410
define <4 x float> @v4f32_estimate2(<4 x float> %x) #5 {
411411
; SSE-LABEL: v4f32_estimate2:
412412
; SSE: # %bb.0:
413-
; SSE-NEXT: rsqrtps %xmm0, %xmm2
414-
; SSE-NEXT: mulps %xmm0, %xmm2
415-
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
416-
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
417-
; SSE-NEXT: cmpleps %xmm0, %xmm1
418-
; SSE-NEXT: andps %xmm2, %xmm1
419-
; SSE-NEXT: movaps %xmm1, %xmm0
413+
; SSE-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
414+
; SSE-NEXT: andps %xmm0, %xmm1
415+
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
416+
; SSE-NEXT: cmpleps %xmm1, %xmm2
417+
; SSE-NEXT: rsqrtps %xmm0, %xmm1
418+
; SSE-NEXT: mulps %xmm1, %xmm0
419+
; SSE-NEXT: andps %xmm2, %xmm0
420420
; SSE-NEXT: retq
421421
;
422422
; AVX1-LABEL: v4f32_estimate2:
423423
; AVX1: # %bb.0:
424-
; AVX1-NEXT: vrsqrtps %xmm0, %xmm1
425-
; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1
426-
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
424+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
427425
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
428-
; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
429-
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
426+
; AVX1-NEXT: vcmpleps %xmm1, %xmm2, %xmm1
427+
; AVX1-NEXT: vrsqrtps %xmm0, %xmm2
428+
; AVX1-NEXT: vmulps %xmm2, %xmm0, %xmm0
429+
; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
430430
; AVX1-NEXT: retq
431431
;
432432
; AVX512-LABEL: v4f32_estimate2:
433433
; AVX512: # %bb.0:
434-
; AVX512-NEXT: vrsqrtps %xmm0, %xmm1
435-
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1
436-
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
437-
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
434+
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
435+
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm1
438436
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
439-
; AVX512-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
440-
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
437+
; AVX512-NEXT: vcmpleps %xmm1, %xmm2, %xmm1
438+
; AVX512-NEXT: vrsqrtps %xmm0, %xmm2
439+
; AVX512-NEXT: vmulps %xmm2, %xmm0, %xmm0
440+
; AVX512-NEXT: vandps %xmm0, %xmm1, %xmm0
441441
; AVX512-NEXT: retq
442442
%sqrt = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
443443
ret <4 x float> %sqrt

llvm/test/CodeGen/X86/ushl_sat_vec.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
281281
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
282282
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
283283
; X64-AVX2-NEXT: vpsllvd %ymm1, %ymm2, %ymm2
284-
; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
284+
; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
285285
; X64-AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
286286
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
287287
; X64-AVX2-NEXT: vpsrlvd %ymm1, %ymm3, %ymm1

0 commit comments

Comments
 (0)