From 4c464b58a936bfbb23415aad24b397467b9bf7da Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 3 Feb 2025 11:53:29 +0000 Subject: [PATCH] [DAG] getNode - convert scalar i1 arithmetic calls to boolean instructions We already do this for vector vXi1 types - this patch removes the vector constraint to handle it for all bool types --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 23 ++++++------- llvm/test/CodeGen/AMDGPU/add_i1.ll | 12 +++---- llvm/test/CodeGen/AMDGPU/mul.ll | 34 +++++++++---------- llvm/test/CodeGen/AMDGPU/sub_i1.ll | 12 +++---- .../CodeGen/LoongArch/ir-instruction/add.ll | 8 ++--- .../CodeGen/LoongArch/ir-instruction/mul.ll | 4 +-- .../CodeGen/LoongArch/ir-instruction/sub.ll | 4 +-- llvm/test/CodeGen/Mips/llvm-ir/add.ll | 30 +++++----------- llvm/test/CodeGen/Mips/llvm-ir/mul.ll | 34 ++++++------------- llvm/test/CodeGen/Mips/llvm-ir/sub.ll | 9 ++--- llvm/test/CodeGen/NVPTX/boolean-patterns.ll | 2 +- .../test/CodeGen/X86/avx512-regcall-NoMask.ll | 18 +++++----- llvm/test/CodeGen/X86/bitcast-vector-bool.ll | 6 ++-- llvm/test/CodeGen/X86/combine-add.ll | 4 +-- llvm/test/CodeGen/X86/fast-isel-select.ll | 2 +- llvm/test/CodeGen/X86/gpr-to-mask.ll | 4 +-- llvm/test/CodeGen/X86/setcc-combine.ll | 11 ++---- llvm/test/CodeGen/X86/sse-regcall.ll | 18 +++++----- llvm/test/CodeGen/X86/sse-regcall4.ll | 18 +++++----- llvm/test/CodeGen/X86/subcarry.ll | 11 ++---- 20 files changed, 109 insertions(+), 155 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8f50a14da25a8..16c3b295426c6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7297,15 +7297,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // it's worth handling here. if (N2CV && N2CV->isZero()) return N1; - if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() && - VT.getVectorElementType() == MVT::i1) + if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && + VT.getScalarType() == MVT::i1) return getNode(ISD::XOR, DL, VT, N1, N2); break; case ISD::MUL: assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); - if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + if (VT.getScalarType() == MVT::i1) return getNode(ISD::AND, DL, VT, N1, N2); if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { const APInt &MulImm = N1->getConstantOperandAPInt(0); @@ -7326,7 +7326,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); - if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { + if (VT.getScalarType() == MVT::i1) { // fold (add_sat x, y) -> (or x, y) for bool types. if (Opcode == ISD::SADDSAT || Opcode == ISD::UADDSAT) return getNode(ISD::OR, DL, VT, N1, N2); @@ -7359,7 +7359,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); - if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + if (VT.getScalarType() == MVT::i1) return getNode(ISD::XOR, DL, VT, N1, N2); break; case ISD::SMIN: @@ -7367,7 +7367,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); - if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + if (VT.getScalarType() == MVT::i1) return getNode(ISD::OR, DL, VT, N1, N2); break; case ISD::SMAX: @@ -7375,7 +7375,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); - if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + if (VT.getScalarType() == MVT::i1) return getNode(ISD::AND, DL, VT, N1, N2); break; case ISD::FADD: @@ -10399,12 +10399,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::VP_ADD: case ISD::VP_SUB: // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR - if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + if (VT.getScalarType() == MVT::i1) Opcode = ISD::VP_XOR; break; case ISD::VP_MUL: // If it is VP_MUL mask operation then turn it to VP_AND - if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + if (VT.getScalarType() == MVT::i1) Opcode = ISD::VP_AND; break; case ISD::VP_REDUCE_MUL: @@ -10509,9 +10509,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags); } - if (VTList.VTs[0].isVector() && - VTList.VTs[0].getVectorElementType() == MVT::i1 && - VTList.VTs[1].getVectorElementType() == MVT::i1) { + if (VTList.VTs[0].getScalarType() == MVT::i1 && + VTList.VTs[1].getScalarType() == MVT::i1) { SDValue F1 = getFreeze(N1); SDValue F2 = getFreeze(N2); // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)} diff --git a/llvm/test/CodeGen/AMDGPU/add_i1.ll b/llvm/test/CodeGen/AMDGPU/add_i1.ll index e9e3fa765b52f..ff1a3ee38be1d 100644 --- a/llvm/test/CodeGen/AMDGPU/add_i1.ll +++ b/llvm/test/CodeGen/AMDGPU/add_i1.ll @@ -3,8 +3,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}add_var_var_i1: -; GFX9: s_xor_b64 -; GFX10: s_xor_b32 +; GFX9: v_xor_b32_e32 +; GFX10: v_xor_b32_e32 define amdgpu_kernel void @add_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { %a = load volatile i1, ptr addrspace(1) %in0 %b = load volatile i1, ptr addrspace(1) %in1 @@ -14,8 +14,8 @@ define amdgpu_kernel void @add_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1 } ; GCN-LABEL: {{^}}add_var_imm_i1: -; GFX9: s_not_b64 -; GFX10: s_not_b32 +; GFX9: s_xor_b64 +; GFX10: s_xor_b32 define amdgpu_kernel void @add_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) { %a = load volatile i1, ptr addrspace(1) %in %add = add i1 %a, 1 @@ -25,8 +25,8 @@ define amdgpu_kernel void @add_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1 ; GCN-LABEL: {{^}}add_i1_cf: ; GCN: ; %endif -; GFX9: s_not_b64 -; GFX10: s_not_b32 +; GFX9: s_xor_b64 +; GFX10: s_xor_b32 define amdgpu_kernel void @add_i1_cf(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll index 2003cb163a985..9b4693f61147a 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.ll @@ -1459,8 +1459,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mul_i32 s6, s6, s7 -; SI-NEXT: s_and_b32 s4, s6, 1 +; SI-NEXT: s_and_b32 s4, s6, s7 +; SI-NEXT: s_and_b32 s4, s4, 1 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm @@ -1473,8 +1473,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mul_i32 s6, s6, s7 -; VI-NEXT: s_and_b32 s4, s6, 1 +; VI-NEXT: s_and_b32 s4, s6, s7 +; VI-NEXT: s_and_b32 s4, s4, 1 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm @@ -1487,8 +1487,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mul_i32 s6, s6, s7 -; GFX9-NEXT: s_and_b32 s4, s6, 1 +; GFX9-NEXT: s_and_b32 s4, s6, s7 +; GFX9-NEXT: s_and_b32 s4, s4, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm @@ -1500,7 +1500,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 ; GFX10-NEXT: s_load_dword s3, s[4:5], 0x70 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_mul_i32 s2, s2, s3 +; GFX10-NEXT: s_and_b32 s2, s2, s3 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; GFX10-NEXT: s_and_b32 s2, s2, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 @@ -1515,7 +1515,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 ; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x70 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_mul_i32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s2, s2, s3 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_and_b32 s2, s2, 1 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1531,7 +1531,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 ; GFX12-NEXT: s_load_b32 s3, s[4:5], 0x70 ; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mul_i32 s2, s2, s3 +; GFX12-NEXT: s_and_b32 s2, s2, s3 ; GFX12-NEXT: s_mov_b32 s3, 0x31016000 ; GFX12-NEXT: s_and_b32 s2, s2, 1 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1555,7 +1555,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 ; EG-NEXT: MOV * T0.X, 0.0, ; EG-NEXT: ALU clause starting at 11: ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, -; EG-NEXT: MULLO_INT * T0.X, T1.X, T0.X, +; EG-NEXT: AND_INT * T1.W, T1.X, T0.X, ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.W, PS, 1, ; EG-NEXT: LSHL * T0.W, PV.W, literal.x, @@ -1589,7 +1589,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) ; SI-NEXT: s_mov_b32 s4, s0 ; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_mul_lo_u32 v0, v0, v1 +; SI-NEXT: v_and_b32_e32 v0, v0, v1 ; SI-NEXT: v_and_b32_e32 v0, 1, v0 ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -1609,7 +1609,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) ; VI-NEXT: s_mov_b32 s4, s0 ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_mul_lo_u32 v0, v0, v1 +; VI-NEXT: v_and_b32_e32 v0, v0, v1 ; VI-NEXT: v_and_b32_e32 v0, 1, v0 ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm @@ -1629,7 +1629,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) ; GFX9-NEXT: s_mov_b32 s4, s0 ; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX9-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm @@ -1650,7 +1650,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) ; GFX10-NEXT: s_mov_b32 s4, s0 ; GFX10-NEXT: s_mov_b32 s5, s1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX10-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX10-NEXT: s_endpgm @@ -1671,7 +1671,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) ; GFX11-NEXT: s_mov_b32 s4, s0 ; GFX11-NEXT: s_mov_b32 s5, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: buffer_store_b8 v0, off, s[4:7], 0 @@ -1693,7 +1693,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) ; GFX12-NEXT: s_mov_b32 s4, s0 ; GFX12-NEXT: s_mov_b32 s5, s1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX12-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX12-NEXT: buffer_store_b8 v0, off, s[4:7], null @@ -1714,7 +1714,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) ; EG-NEXT: MOV * T0.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 11: ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, -; EG-NEXT: MULLO_INT * T0.X, T0.X, T1.X, +; EG-NEXT: AND_INT * T1.W, T0.X, T1.X, ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) ; EG-NEXT: AND_INT T1.W, PS, 1, ; EG-NEXT: LSHL * T0.W, PV.W, literal.x, diff --git a/llvm/test/CodeGen/AMDGPU/sub_i1.ll b/llvm/test/CodeGen/AMDGPU/sub_i1.ll index a6ab1bd9e19f1..19d012fc074f8 100644 --- a/llvm/test/CodeGen/AMDGPU/sub_i1.ll +++ b/llvm/test/CodeGen/AMDGPU/sub_i1.ll @@ -4,8 +4,8 @@ ; GCN-LABEL: {{^}}sub_var_var_i1: -; WAVE32: s_xor_b32 -; WAVE64: s_xor_b64 +; WAVE32: v_xor_b32_e32 +; WAVE64: v_xor_b32_e32 define amdgpu_kernel void @sub_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) { %a = load volatile i1, ptr addrspace(1) %in0 %b = load volatile i1, ptr addrspace(1) %in1 @@ -15,8 +15,8 @@ define amdgpu_kernel void @sub_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1 } ; GCN-LABEL: {{^}}sub_var_imm_i1: -; WAVE32: s_not_b32 -; WAVE64: s_not_b64 +; WAVE32: s_xor_b32 +; WAVE64: s_xor_b64 define amdgpu_kernel void @sub_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) { %a = load volatile i1, ptr addrspace(1) %in %sub = sub i1 %a, 1 @@ -26,8 +26,8 @@ define amdgpu_kernel void @sub_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1 ; GCN-LABEL: {{^}}sub_i1_cf: ; GCN: ; %endif -; WAVE32: s_not_b32 -; WAVE64: s_not_b64 +; WAVE32: s_xor_b32 +; WAVE64: s_xor_b64 define amdgpu_kernel void @sub_i1_cf(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll index f156f8d6afce5..69db3790fc1b3 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll @@ -7,12 +7,12 @@ define i1 @add_i1(i1 %x, i1 %y) { ; LA32-LABEL: add_i1: ; LA32: # %bb.0: -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: xor $a0, $a0, $a1 ; LA32-NEXT: ret ; ; LA64-LABEL: add_i1: ; LA64: # %bb.0: -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: xor $a0, $a0, $a1 ; LA64-NEXT: ret %add = add i1 %x, %y ret i1 %add @@ -97,12 +97,12 @@ define i64 @add_i64(i64 %x, i64 %y) { define i1 @add_i1_3(i1 %x) { ; LA32-LABEL: add_i1_3: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, 1 +; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: add_i1_3: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, 1 +; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret %add = add i1 %x, 3 ret i1 %add diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll index 58cc0e7d6484a..3a0cfd00940c5 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll @@ -7,12 +7,12 @@ define i1 @mul_i1(i1 %a, i1 %b) { ; LA32-LABEL: mul_i1: ; LA32: # %bb.0: # %entry -; LA32-NEXT: mul.w $a0, $a0, $a1 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: ret ; ; LA64-LABEL: mul_i1: ; LA64: # %bb.0: # %entry -; LA64-NEXT: mul.d $a0, $a0, $a1 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: ret entry: %r = mul i1 %a, %b diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll index 12543f857a198..ce4a199b57c3d 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll @@ -7,12 +7,12 @@ define i1 @sub_i1(i1 %x, i1 %y) { ; LA32-LABEL: sub_i1: ; LA32: # %bb.0: -; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: xor $a0, $a0, $a1 ; LA32-NEXT: ret ; ; LA64-LABEL: sub_i1: ; LA64: # %bb.0: -; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: xor $a0, $a0, $a1 ; LA64-NEXT: ret %sub = sub i1 %x, %y ret i1 %sub diff --git a/llvm/test/CodeGen/Mips/llvm-ir/add.ll b/llvm/test/CodeGen/Mips/llvm-ir/add.ll index f6b3b96aaa0ce..a21477acd5341 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/add.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/add.ll @@ -38,18 +38,11 @@ define signext i1 @add_i1(i1 signext %a, i1 signext %b) { entry: ; ALL-LABEL: add_i1: - ; NOT-R2-R6: addu $[[T0:[0-9]+]], $4, $5 - ; NOT-R2-R6: andi $[[T0]], $[[T0]], 1 - ; NOT-R2-R6: negu $2, $[[T0]] + ; NOT-R2-R6: xor $[[T0:[0-9]+]], $4, $5 - ; R2-R6: addu $[[T0:[0-9]+]], $4, $5 - ; R2-R6: andi $[[T0]], $[[T0]], 1 - ; R2-R6: negu $2, $[[T0]] + ; R2-R6: xor $[[T0:[0-9]+]], $4, $5 - ; MMR6: addu16 $[[T0:[0-9]+]], $4, $5 - ; MMR6: andi16 $[[T0]], $[[T0]], 1 - ; MMR6: li16 $[[T1:[0-9]+]], 0 - ; MMR6: subu16 $[[T0]], $[[T1]], $[[T0]] + ; MMR6: xor $[[T0:[0-9]+]], $4, $5 %r = add i1 %a, %b ret i1 %r @@ -368,18 +361,11 @@ define signext i128 @add_i128_4(i128 signext %a) { define signext i1 @add_i1_3(i1 signext %a) { ; ALL-LABEL: add_i1_3: - ; GP32: addiu $[[T0:[0-9]+]], $4, 1 - ; GP32: andi $[[T0]], $[[T0]], 1 - ; GP32: negu $2, $[[T0]] - - ; GP64: addiu $[[T0:[0-9]+]], $4, 1 - ; GP64: andi $[[T0]], $[[T0]], 1 - ; GP64: negu $2, $[[T0]] - - ; MMR6: addiur2 $[[T0:[0-9]+]], $4, 1 - ; MMR6: andi16 $[[T0]], $[[T0]], 1 - ; MMR6: li16 $[[T1:[0-9]+]], 0 - ; MMR6: subu16 $2, $[[T1]], $[[T0]] + ; GP32: not $[[T0:[0-9]+]], $4 + + ; GP64: not $[[T0:[0-9]+]], $4 + + ; MMR6: not16 $[[T0:[0-9]+]], $4 %r = add i1 3, %a ret i1 %r diff --git a/llvm/test/CodeGen/Mips/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/llvm-ir/mul.ll index 00b91d1413cfe..2735d53f5fe06 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/mul.ll @@ -31,36 +31,22 @@ define signext i1 @mul_i1(i1 signext %a, i1 signext %b) { entry: ; ALL-LABEL: mul_i1: - ; M2: mult $4, $5 - ; M2: mflo $[[T0:[0-9]+]] - ; M2: andi $[[T0]], $[[T0]], 1 - ; M2: negu $2, $[[T0]] + ; M2: and $[[T0:[0-9]+]], $4, $5 - ; 32R1-R5: mul $[[T0:[0-9]+]], $4, $5 - ; 32R1-R5: andi $[[T0]], $[[T0]], 1 - ; 32R1-R5: negu $2, $[[T0]] + ; 32R1-R5: and $[[T0:[0-9]+]], $4, $5 - ; 32R6: mul $[[T0:[0-9]+]], $4, $5 - ; 32R6: andi $[[T0]], $[[T0]], 1 - ; 32R6: negu $2, $[[T0]] + ; 32R6: and $[[T0:[0-9]+]], $4, $5 - ; M4: mult $4, $5 - ; M4: mflo $[[T0:[0-9]+]] - ; M4: andi $[[T0]], $[[T0]], 1 - ; M4: negu $2, $[[T0]] + ; M4: and $[[T0:[0-9]+]], $4, $5 - ; 64R1-R5: mul $[[T0:[0-9]+]], $4, $5 - ; 64R1-R5: andi $[[T0]], $[[T0]], 1 - ; 64R1-R5: negu $2, $[[T0]] + ; 64R1-R5: and $[[T0:[0-9]+]], $4, $5 - ; 64R6: mul $[[T0:[0-9]+]], $4, $5 - ; 64R6: andi $[[T0]], $[[T0]], 1 - ; 64R6: negu $2, $[[T0]] + ; 64R6: and $[[T0:[0-9]+]], $4, $5 - ; MM32: mul $[[T0:[0-9]+]], $4, $5 - ; MM32: andi16 $[[T0]], $[[T0]], 1 - ; MM32: li16 $[[T1:[0-9]+]], 0 - ; MM32: subu16 $2, $[[T1]], $[[T0]] + ; MM32R3: and16 $4, $5 + ; MM32R3: move $2, $4 + + ; MM32R6: and $[[T0:[0-9]+]], $4, $5 %r = mul i1 %a, %b ret i1 %r diff --git a/llvm/test/CodeGen/Mips/llvm-ir/sub.ll b/llvm/test/CodeGen/Mips/llvm-ir/sub.ll index b465e24d47a05..dd5e6e957245d 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/sub.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/sub.ll @@ -33,14 +33,9 @@ define signext i1 @sub_i1(i1 signext %a, i1 signext %b) { entry: ; ALL-LABEL: sub_i1: - ; NOT-MM: subu $[[T0:[0-9]+]], $4, $5 - ; NOT-MM: andi $[[T0]], $[[T0]], 1 - ; NOT-MM: negu $2, $[[T0]] + ; NOT-MM: xor $[[T0:[0-9]+]], $4, $5 - ; MM: subu16 $[[T0:[0-9]+]], $4, $5 - ; MM: andi16 $[[T0]], $[[T0]], 1 - ; MM: li16 $[[T1:[0-9]+]], 0 - ; MM: subu16 $2, $[[T1]], $[[T0]] + ; MM: xor16 $[[T0:[0-9]+]], $4, $5 %r = sub i1 %a, %b ret i1 %r diff --git a/llvm/test/CodeGen/NVPTX/boolean-patterns.ll b/llvm/test/CodeGen/NVPTX/boolean-patterns.ll index 6ed9890610826..fd4d325ae9374 100644 --- a/llvm/test/CodeGen/NVPTX/boolean-patterns.ll +++ b/llvm/test/CodeGen/NVPTX/boolean-patterns.ll @@ -3,7 +3,7 @@ ; CHECK-LABEL: m2and_rr define i1 @m2and_rr(i1 %a, i1 %b) { -; CHECK: and.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}} +; CHECK: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}} ; CHECK-NOT: mul %r = mul i1 %a, %b ret i1 %r diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll index aa3c369efc61a..88c99a06326ab 100644 --- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -7,19 +7,19 @@ define dso_local x86_regcallcc i1 @test_argReti1(i1 %a) { ; X32-LABEL: test_argReti1: ; X32: # %bb.0: -; X32-NEXT: incb %al +; X32-NEXT: xorb $1, %al ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl ; ; WIN64-LABEL: test_argReti1: ; WIN64: # %bb.0: -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: # kill: def $al killed $al killed $eax ; WIN64-NEXT: retq ; ; LINUXOSX64-LABEL: test_argReti1: ; LINUXOSX64: # %bb.0: -; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: xorb $1, %al ; LINUXOSX64-NEXT: # kill: def $al killed $al killed $eax ; LINUXOSX64-NEXT: retq %add = add i1 %a, 1 @@ -30,10 +30,10 @@ define dso_local x86_regcallcc i1 @test_argReti1(i1 %a) { define dso_local x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; X32-LABEL: test_CallargReti1: ; X32: # %bb.0: -; X32-NEXT: incb %al +; X32-NEXT: xorb $1, %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: calll _test_argReti1 -; X32-NEXT: incb %al +; X32-NEXT: xorb $1, %al ; X32-NEXT: retl ; ; WIN64-LABEL: test_CallargReti1: @@ -41,10 +41,10 @@ define dso_local x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; WIN64-NEXT: pushq %rax ; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: movzbl %al, %eax ; WIN64-NEXT: callq test_argReti1 -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: .seh_startepilogue ; WIN64-NEXT: popq %rcx ; WIN64-NEXT: .seh_endepilogue @@ -55,10 +55,10 @@ define dso_local x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; LINUXOSX64: # %bb.0: ; LINUXOSX64-NEXT: pushq %rax ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: xorb $1, %al ; LINUXOSX64-NEXT: movzbl %al, %eax ; LINUXOSX64-NEXT: callq test_argReti1 -; LINUXOSX64-NEXT: incb %al +; LINUXOSX64-NEXT: xorb $1, %al ; LINUXOSX64-NEXT: popq %rcx ; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll index 501e73c46af9c..86d7df0c2d648 100644 --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -16,7 +16,7 @@ define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind { ; SSE-NEXT: movmskpd %xmm0, %ecx ; SSE-NEXT: movl %ecx, %eax ; SSE-NEXT: shrb %al -; SSE-NEXT: addb %cl, %al +; SSE-NEXT: xorb %cl, %al ; SSE-NEXT: retq ; ; AVX12-LABEL: bitcast_v2i64_to_v2i1: @@ -24,7 +24,7 @@ define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind { ; AVX12-NEXT: vmovmskpd %xmm0, %ecx ; AVX12-NEXT: movl %ecx, %eax ; AVX12-NEXT: shrb %al -; AVX12-NEXT: addb %cl, %al +; AVX12-NEXT: xorb %cl, %al ; AVX12-NEXT: retq ; ; AVX512-LABEL: bitcast_v2i64_to_v2i1: @@ -34,7 +34,7 @@ define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind { ; AVX512-NEXT: kshiftrw $1, %k0, %k1 ; AVX512-NEXT: kmovd %k1, %ecx ; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: addb %cl, %al +; AVX512-NEXT: xorb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = icmp slt <2 x i64> %a0, zeroinitializer diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll index 01a0320bc6b2f..ff9f995c4765b 100644 --- a/llvm/test/CodeGen/X86/combine-add.ll +++ b/llvm/test/CodeGen/X86/combine-add.ll @@ -501,10 +501,8 @@ define i1 @PR51238(i1 %b, i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: PR51238: ; CHECK: # %bb.0: ; CHECK-NEXT: notb %cl -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: addb %dl, %cl -; CHECK-NEXT: adcb $1, %al -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: setae %al ; CHECK-NEXT: retq %ny = xor i8 %y, -1 %nz = xor i8 %z, -1 diff --git a/llvm/test/CodeGen/X86/fast-isel-select.ll b/llvm/test/CodeGen/X86/fast-isel-select.ll index 94477f3286e48..83e8d0a9d418b 100644 --- a/llvm/test/CodeGen/X86/fast-isel-select.ll +++ b/llvm/test/CodeGen/X86/fast-isel-select.ll @@ -11,7 +11,7 @@ define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) { ; CHECK: ## %bb.0: ; CHECK-NEXT: movb %sil, %al ; CHECK-NEXT: movb %dil, %dl -; CHECK-NEXT: subb %al, %dl +; CHECK-NEXT: xorb %al, %dl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movl $1204476887, %ecx ## imm = 0x47CADBD7 ; CHECK-NEXT: testb $1, %dl diff --git a/llvm/test/CodeGen/X86/gpr-to-mask.ll b/llvm/test/CodeGen/X86/gpr-to-mask.ll index c6e9606a7f7e3..d1513b584887f 100644 --- a/llvm/test/CodeGen/X86/gpr-to-mask.ll +++ b/llvm/test/CodeGen/X86/gpr-to-mask.ll @@ -106,7 +106,7 @@ define void @test_load_add(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f ; X86-64-NEXT: je .LBB2_2 ; X86-64-NEXT: # %bb.1: # %if ; X86-64-NEXT: movzbl (%rdx), %eax -; X86-64-NEXT: addb (%rcx), %al +; X86-64-NEXT: xorb (%rcx), %al ; X86-64-NEXT: jmp .LBB2_3 ; X86-64-NEXT: .LBB2_2: # %else ; X86-64-NEXT: movzbl (%rcx), %eax @@ -127,7 +127,7 @@ define void @test_load_add(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f ; X86-32-NEXT: # %bb.1: # %if ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-32-NEXT: movzbl (%edx), %edx -; X86-32-NEXT: addb (%ecx), %dl +; X86-32-NEXT: xorb (%ecx), %dl ; X86-32-NEXT: jmp .LBB2_3 ; X86-32-NEXT: .LBB2_2: # %else ; X86-32-NEXT: movzbl (%ecx), %edx diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll index 0745881b2f3a3..e723569bda8a1 100644 --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -262,8 +262,8 @@ define void @test_i1_uge(ptr%A2) { define i64 @PR40657(i8 %var2, i8 %var9) { ; CHECK-LABEL: PR40657: ; CHECK: # %bb.0: -; CHECK-NEXT: addb %sil, %dil -; CHECK-NEXT: incb %dil +; CHECK-NEXT: xorl %esi, %edi +; CHECK-NEXT: notb %dil ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq @@ -283,12 +283,7 @@ define i64 @PR40657(i8 %var2, i8 %var9) { define i64 @PR40657_commute(i8 %var7, i8 %var8, i8 %var9) { ; CHECK-LABEL: PR40657_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: subb %dil, %sil -; CHECK-NEXT: subb %sil, %dl -; CHECK-NEXT: subb %dl, %sil -; CHECK-NEXT: xorb %dl, %sil -; CHECK-NEXT: subb %sil, %dl -; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %var4 = trunc i8 %var9 to i1 diff --git a/llvm/test/CodeGen/X86/sse-regcall.ll b/llvm/test/CodeGen/X86/sse-regcall.ll index 90b9b02f6a97a..03b9e123eea48 100644 --- a/llvm/test/CodeGen/X86/sse-regcall.ll +++ b/llvm/test/CodeGen/X86/sse-regcall.ll @@ -7,19 +7,19 @@ define x86_regcallcc i1 @test_argReti1(i1 %a) { ; WIN32-LABEL: test_argReti1: ; WIN32: # %bb.0: -; WIN32-NEXT: incb %al +; WIN32-NEXT: xorb $1, %al ; WIN32-NEXT: # kill: def $al killed $al killed $eax ; WIN32-NEXT: retl ; ; WIN64-LABEL: test_argReti1: ; WIN64: # %bb.0: -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: # kill: def $al killed $al killed $eax ; WIN64-NEXT: retq ; ; LINUXOSX-LABEL: test_argReti1: ; LINUXOSX: # %bb.0: -; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: xorb $1, %al ; LINUXOSX-NEXT: # kill: def $al killed $al killed $eax ; LINUXOSX-NEXT: retq %add = add i1 %a, 1 @@ -30,10 +30,10 @@ define x86_regcallcc i1 @test_argReti1(i1 %a) { define x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; WIN32-LABEL: test_CallargReti1: ; WIN32: # %bb.0: -; WIN32-NEXT: incb %al +; WIN32-NEXT: xorb $1, %al ; WIN32-NEXT: movzbl %al, %eax ; WIN32-NEXT: calll _test_argReti1 -; WIN32-NEXT: incb %al +; WIN32-NEXT: xorb $1, %al ; WIN32-NEXT: retl ; ; WIN64-LABEL: test_CallargReti1: @@ -41,10 +41,10 @@ define x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; WIN64-NEXT: pushq %rax ; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: movzbl %al, %eax ; WIN64-NEXT: callq test_argReti1 -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: .seh_startepilogue ; WIN64-NEXT: popq %rcx ; WIN64-NEXT: .seh_endepilogue @@ -55,10 +55,10 @@ define x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; LINUXOSX: # %bb.0: ; LINUXOSX-NEXT: pushq %rax ; LINUXOSX-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: xorb $1, %al ; LINUXOSX-NEXT: movzbl %al, %eax ; LINUXOSX-NEXT: callq *test_argReti1@GOTPCREL(%rip) -; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: xorb $1, %al ; LINUXOSX-NEXT: popq %rcx ; LINUXOSX-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sse-regcall4.ll b/llvm/test/CodeGen/X86/sse-regcall4.ll index 4be1f39a7d764..6f964f0a88ea3 100644 --- a/llvm/test/CodeGen/X86/sse-regcall4.ll +++ b/llvm/test/CodeGen/X86/sse-regcall4.ll @@ -7,19 +7,19 @@ define x86_regcallcc i1 @test_argReti1(i1 %a) { ; WIN32-LABEL: test_argReti1: ; WIN32: # %bb.0: -; WIN32-NEXT: incb %cl +; WIN32-NEXT: xorb $1, %cl ; WIN32-NEXT: # kill: def $cl killed $cl killed $ecx ; WIN32-NEXT: retl ; ; WIN64-LABEL: test_argReti1: ; WIN64: # %bb.0: -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: # kill: def $al killed $al killed $eax ; WIN64-NEXT: retq ; ; LINUXOSX-LABEL: test_argReti1: ; LINUXOSX: # %bb.0: -; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: xorb $1, %al ; LINUXOSX-NEXT: # kill: def $al killed $al killed $eax ; LINUXOSX-NEXT: retq %add = add i1 %a, 1 @@ -30,10 +30,10 @@ define x86_regcallcc i1 @test_argReti1(i1 %a) { define x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; WIN32-LABEL: test_CallargReti1: ; WIN32: # %bb.0: -; WIN32-NEXT: incb %cl +; WIN32-NEXT: xorb $1, %cl ; WIN32-NEXT: movzbl %cl, %ecx ; WIN32-NEXT: calll _test_argReti1 -; WIN32-NEXT: incb %cl +; WIN32-NEXT: xorb $1, %cl ; WIN32-NEXT: retl ; ; WIN64-LABEL: test_CallargReti1: @@ -41,10 +41,10 @@ define x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; WIN64-NEXT: pushq %rax ; WIN64-NEXT: .seh_stackalloc 8 ; WIN64-NEXT: .seh_endprologue -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: movzbl %al, %eax ; WIN64-NEXT: callq test_argReti1 -; WIN64-NEXT: incb %al +; WIN64-NEXT: xorb $1, %al ; WIN64-NEXT: .seh_startepilogue ; WIN64-NEXT: popq %rcx ; WIN64-NEXT: .seh_endepilogue @@ -55,10 +55,10 @@ define x86_regcallcc i1 @test_CallargReti1(i1 %a) { ; LINUXOSX: # %bb.0: ; LINUXOSX-NEXT: pushq %rax ; LINUXOSX-NEXT: .cfi_def_cfa_offset 16 -; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: xorb $1, %al ; LINUXOSX-NEXT: movzbl %al, %eax ; LINUXOSX-NEXT: callq *test_argReti1@GOTPCREL(%rip) -; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: xorb $1, %al ; LINUXOSX-NEXT: popq %rcx ; LINUXOSX-NEXT: .cfi_def_cfa_offset 8 ; LINUXOSX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll index 9538ea1061cd1..7d5db07c0172a 100644 --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -310,15 +310,10 @@ define { i64, i64, i1 } @subcarry_2x64_add_reversed(i64 %x0, i64 %x1, i64 %y0, i ; CHECK-LABEL: subcarry_2x64_add_reversed: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq %rsi, %rdi -; CHECK-NEXT: subq %rcx, %rdi ; CHECK-NEXT: subq %rdx, %rax -; CHECK-NEXT: sbbq $0, %rdi -; CHECK-NEXT: setb %r8b -; CHECK-NEXT: cmpq %rcx, %rsi -; CHECK-NEXT: adcb $0, %r8b -; CHECK-NEXT: movq %rdi, %rdx -; CHECK-NEXT: movl %r8d, %ecx +; CHECK-NEXT: sbbq %rcx, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rsi, %rdx ; CHECK-NEXT: retq %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0) %s0 = extractvalue { i64, i1 } %t0, 0