diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7b46b109de3b0..a23a4b67df542 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -52661,6 +52661,29 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getMemOperand()->getFlags()); } + // Convert scalar fabs/fneg load-store to integer equivalents. + if ((VT == MVT::f32 || VT == MVT::f64) && + (StoredVal.getOpcode() == ISD::FABS || + StoredVal.getOpcode() == ISD::FNEG) && + ISD::isNormalLoad(StoredVal.getOperand(0).getNode()) && + StoredVal.hasOneUse() && StoredVal.getOperand(0).hasOneUse()) { + MVT IntVT = VT.getSimpleVT().changeTypeToInteger(); + if (TLI.isTypeLegal(IntVT)) { + APInt SignMask = APInt::getSignMask(VT.getScalarSizeInBits()); + unsigned SignOp = ISD::XOR; + if (StoredVal.getOpcode() == ISD::FABS) { + SignMask = ~SignMask; + SignOp = ISD::AND; + } + SDValue LogicOp = DAG.getNode( + SignOp, dl, IntVT, DAG.getBitcast(IntVT, StoredVal.getOperand(0)), + DAG.getConstant(SignMask, dl, IntVT)); + return DAG.getStore(St->getChain(), dl, LogicOp, St->getBasePtr(), + St->getPointerInfo(), St->getOriginalAlign(), + St->getMemOperand()->getFlags()); + } + } + // If we are saving a 32-byte vector and 32-byte stores are slow, such as on // Sandy Bridge, perform two 16-byte stores. unsigned Fast; diff --git a/llvm/test/CodeGen/X86/combine-fabs.ll b/llvm/test/CodeGen/X86/combine-fabs.ll index d337c7693ff7d..b9aad9075261b 100644 --- a/llvm/test/CodeGen/X86/combine-fabs.ll +++ b/llvm/test/CodeGen/X86/combine-fabs.ll @@ -135,20 +135,16 @@ define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) { ret <4 x float> %2 } -; TODO: store(fabs(load())) - convert scalar to integer +; store(fabs(load())) - convert scalar to integer define void @combine_fabs_int_rmw_f64(ptr %ptr) { ; SSE-LABEL: combine_fabs_int_rmw_f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: movlps %xmm0, (%rdi) +; SSE-NEXT: andb $127, 7(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: combine_fabs_int_rmw_f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovlps %xmm0, (%rdi) +; AVX-NEXT: andb $127, 7(%rdi) ; AVX-NEXT: retq %1 = load double, ptr %ptr %2 = call double @llvm.fabs.f64(double %1) @@ -159,17 +155,16 @@ define void @combine_fabs_int_rmw_f64(ptr %ptr) { define void @combine_fabs_int_f32(ptr %src, ptr %dst) { ; SSE-LABEL: combine_fabs_int_f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: movss %xmm0, (%rsi) +; SSE-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; SSE-NEXT: andl (%rdi), %eax +; SSE-NEXT: movl %eax, (%rsi) ; SSE-NEXT: retq ; ; AVX-LABEL: combine_fabs_int_f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] -; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovss %xmm0, (%rsi) +; AVX-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; AVX-NEXT: andl (%rdi), %eax +; AVX-NEXT: movl %eax, (%rsi) ; AVX-NEXT: retq %1 = load float, ptr %src %2 = call float @llvm.fabs.f32(float %1) diff --git a/llvm/test/CodeGen/X86/combine-fneg.ll b/llvm/test/CodeGen/X86/combine-fneg.ll index e8e3465c99383..855b64229a9c3 100644 --- a/llvm/test/CodeGen/X86/combine-fneg.ll +++ b/llvm/test/CodeGen/X86/combine-fneg.ll @@ -205,21 +205,17 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind { ret <4 x float> %tmp } -; TODO: store(fneg(load())) - convert scalar to integer +; store(fneg(load())) - convert scalar to integer define void @fneg_int_rmw_f32(ptr %ptr) { ; X86-SSE-LABEL: fneg_int_rmw_f32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movss %xmm0, (%eax) +; X86-SSE-NEXT: xorb $-128, 3(%eax) ; X86-SSE-NEXT: retl ; ; X64-SSE-LABEL: fneg_int_rmw_f32: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE-NEXT: movss %xmm0, (%rdi) +; X64-SSE-NEXT: xorb $-128, 3(%rdi) ; X64-SSE-NEXT: retq %1 = load float, ptr %ptr %2 = fneg float %1 @@ -246,19 +242,12 @@ define void @fneg_int_f64(ptr %src, ptr %dst) { ; X86-SSE2-NEXT: movlps %xmm0, (%eax) ; X86-SSE2-NEXT: retl ; -; X64-SSE1-LABEL: fneg_int_f64: -; X64-SSE1: # %bb.0: -; X64-SSE1-NEXT: fldl (%rdi) -; X64-SSE1-NEXT: fchs -; X64-SSE1-NEXT: fstpl (%rsi) -; X64-SSE1-NEXT: retq -; -; X64-SSE2-LABEL: fneg_int_f64: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: movlps %xmm0, (%rsi) -; X64-SSE2-NEXT: retq +; X64-SSE-LABEL: fneg_int_f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-SSE-NEXT: xorq (%rdi), %rax +; X64-SSE-NEXT: movq %rax, (%rsi) +; X64-SSE-NEXT: retq %1 = load double, ptr %src %2 = fneg double %1 store double %2, ptr %dst diff --git a/llvm/test/CodeGen/X86/fsxor-alignment.ll b/llvm/test/CodeGen/X86/fsxor-alignment.ll index 1b9d6c91ad2b2..6fa4a310956dc 100644 --- a/llvm/test/CodeGen/X86/fsxor-alignment.ll +++ b/llvm/test/CodeGen/X86/fsxor-alignment.ll @@ -8,15 +8,16 @@ define void @foo(ptr %p, ptr %q, float %s, float %y) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; CHECK-NEXT: xorps %xmm1, %xmm0 -; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: xorps %xmm1, %xmm2 -; CHECK-NEXT: movss %xmm0, (%ecx) -; CHECK-NEXT: movss %xmm2, (%eax) +; CHECK-NEXT: movl $-2147483648, %edx # imm = 0x80000000 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: xorl %edx, %esi +; CHECK-NEXT: movl %esi, (%ecx) +; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %ss = fsub float -0.0, %s %yy = fsub float -0.0, %y