Skip to content

Commit c57ca00

Browse files
committed
[X86][APX] Combine (X86Sub 0, AND(X, Y)) to (X86And X, Y) for CLOAD/CSTORE
https://godbolt.org/z/TsWochrbf
1 parent 0a17427 commit c57ca00

File tree

2 files changed

+50
-5
lines changed

2 files changed

+50
-5
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57814,16 +57814,25 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
5781457814
if (Sub.getOpcode() != X86ISD::SUB)
5781557815
return SDValue();
5781657816

57817-
SDValue SetCC = Sub.getOperand(1);
57817+
SDValue Op1 = Sub.getOperand(1);
5781857818

57819-
if (!X86::isZeroNode(Sub.getOperand(0)) || SetCC.getOpcode() != X86ISD::SETCC)
57819+
if (!X86::isZeroNode(Sub.getOperand(0)))
5782057820
return SDValue();
5782157821

57822+
SDLoc DL(N);
5782257823
SmallVector<SDValue, 5> Ops(N->op_values());
57823-
Ops[3] = SetCC.getOperand(0);
57824-
Ops[4] = SetCC.getOperand(1);
57824+
if (Op1.getOpcode() == X86ISD::SETCC) {
57825+
Ops[3] = Op1.getOperand(0);
57826+
Ops[4] = Op1.getOperand(1);
57827+
} else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) {
57828+
Ops[4] = DAG.getNode(X86ISD::AND, DL, Sub->getVTList(), Op1.getOperand(0),
57829+
Op1.getOperand(1))
57830+
.getValue(1);
57831+
} else {
57832+
return SDValue();
57833+
}
5782557834

57826-
return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops,
57835+
return DAG.getMemIntrinsicNode(N->getOpcode(), DL, N->getVTList(), Ops,
5782757836
cast<MemSDNode>(N)->getMemoryVT(),
5782857837
cast<MemSDNode>(N)->getMemOperand());
5782957838
}

llvm/test/CodeGen/X86/apx/cf.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,39 @@ entry:
158158
tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %5, ptr %p, i32 2, <1 x i1> %1)
159159
ret void
160160
}
161+
162+
define void @load_zext(i1 %cond, ptr %b, ptr %p) {
163+
; CHECK-LABEL: load_zext:
164+
; CHECK: # %bb.0: # %entry
165+
; CHECK-NEXT: andb $1, %dil
166+
; CHECK-NEXT: cfcmovnew (%rsi), %ax
167+
; CHECK-NEXT: movzwl %ax, %eax
168+
; CHECK-NEXT: cfcmovnel %eax, (%rdx)
169+
; CHECK-NEXT: retq
170+
entry:
171+
%0 = bitcast i1 %cond to <1 x i1>
172+
%1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 2, <1 x i1> %0, <1 x i16> poison)
173+
%2 = bitcast <1 x i16> %1 to i16
174+
%zext = zext i16 %2 to i32
175+
%3 = bitcast i32 %zext to <1 x i32>
176+
call void @llvm.masked.store.v1i32.p0(<1 x i32> %3, ptr %p, i32 4, <1 x i1> %0)
177+
ret void
178+
}
179+
180+
define void @load_sext(i1 %cond, ptr %b, ptr %p) {
181+
; CHECK-LABEL: load_sext:
182+
; CHECK: # %bb.0: # %entry
183+
; CHECK-NEXT: andb $1, %dil
184+
; CHECK-NEXT: cfcmovnel (%rsi), %eax
185+
; CHECK-NEXT: cltq
186+
; CHECK-NEXT: cfcmovneq %rax, (%rdx)
187+
; CHECK-NEXT: retq
188+
entry:
189+
%0 = bitcast i1 %cond to <1 x i1>
190+
%1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 2, <1 x i1> %0, <1 x i32> poison)
191+
%2 = bitcast <1 x i32> %1 to i32
192+
%zext = sext i32 %2 to i64
193+
%3 = bitcast i64 %zext to <1 x i64>
194+
call void @llvm.masked.store.v1i64.p0(<1 x i64> %3, ptr %p, i32 4, <1 x i1> %0)
195+
ret void
196+
}

0 commit comments

Comments
 (0)