diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index bf3add010574b..dd26d3e008745 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -761,6 +762,59 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, } } +/// Estimates the number of cycles elapsed between DefMI and UseMI, DefMI +/// inclusive and UseMI exclusive, if they're in the same MBB. Returns +/// std::nullopt if they're in different MBBs, and 0 if UseMI is null. +static std::optional +estimateDefUseCycles(const TargetSchedModel &Sched, const MachineInstr *DefMI, + const MachineInstr *UseMI) { + if (!UseMI) + return 0; + if (DefMI->getParent() != UseMI->getParent()) + return std::nullopt; + + const auto DefIt = DefMI->getIterator(); + const auto UseIt = UseMI->getIterator(); + + unsigned NumMicroOps = 0; + for (auto It = DefIt; It != UseIt; ++It) { + // In cases where the UseMI is a PHI at the beginning of the MBB, compute + // MicroOps until the end of the MBB. + if (It.isEnd()) + break; + + NumMicroOps += Sched.getNumMicroOps(&*It); + } + return NumMicroOps / Sched.getIssueWidth(); +} + +/// Wraps Sched.computeOperandLatency, accounting for the case when +/// InstrSchedModel and InstrItineraries are not available: in this case, +/// Sched.computeOperandLatency returns DefaultDefLatency, which is a very rough +/// approximate; to improve this approximate, offset it by the approximate +/// cycles elapsed from DefMI to UseMI (since the MIs could be re-ordered by the +/// scheduler, and we don't have this information, this cannot be known +/// exactly). When scheduling information is available, +/// Sched.computeOperandLatency returns a much better estimate (especially if +/// UseMI is non-null), so we just return that. +static unsigned computeOperandLatency(const TargetSchedModel &Sched, + const MachineInstr *DefMI, + unsigned DefOperIdx, + const MachineInstr *UseMI, + unsigned UseOperIdx) { + assert(DefMI && "Non-null DefMI expected"); + if (!Sched.hasInstrSchedModel() && !Sched.hasInstrItineraries()) { + unsigned DefaultDefLatency = Sched.getInstrInfo()->defaultDefLatency( + *Sched.getMCSchedModel(), *DefMI); + std::optional DefUseCycles = + estimateDefUseCycles(Sched, DefMI, UseMI); + if (!DefUseCycles || DefaultDefLatency <= DefUseCycles) + return 0; + return DefaultDefLatency - *DefUseCycles; + } + return Sched.computeOperandLatency(DefMI, DefOperIdx, UseMI, UseOperIdx); +} + /// The length of the critical path through a trace is the maximum of two path /// lengths: /// @@ -813,8 +867,8 @@ updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI, unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); + DepCycle += computeOperandLatency(MTM.SchedModel, Dep.DefMI, Dep.DefOp, + &UseMI, Dep.UseOp); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. @@ -929,8 +983,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, if (!MI.isTransient()) { // We may not know the UseMI of this dependency, if it came from the // live-in list. SchedModel can handle a NULL UseMI. - DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(), - I->MI, I->Op); + DepHeight += computeOperandLatency(SchedModel, &MI, MO.getOperandNo(), + I->MI, I->Op); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -963,10 +1017,9 @@ static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII) { - // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) - UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, - Dep.UseOp); + UseHeight += computeOperandLatency(SchedModel, Dep.DefMI, Dep.DefOp, &UseMI, + Dep.UseOp); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -1192,8 +1245,8 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const { unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, - &PHI, Dep.UseOp); + DepCycle += computeOperandLatency(TE.MTM.SchedModel, Dep.DefMI, Dep.DefOp, + &PHI, Dep.UseOp); return DepCycle; } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll index 5c42fefb95b39..69261126cd8b0 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll @@ -94,15 +94,15 @@ define i7 @bitreverse_i7(i7 %x) { ; RV32-NEXT: or a1, a1, a2 ; RV32-NEXT: slli a2, a0, 2 ; RV32-NEXT: andi a2, a2, 16 +; RV32-NEXT: or a1, a1, a2 ; RV32-NEXT: andi a0, a0, 127 -; RV32-NEXT: andi a3, a0, 8 -; RV32-NEXT: or a2, a2, a3 +; RV32-NEXT: andi a2, a0, 8 ; RV32-NEXT: or a1, a1, a2 ; RV32-NEXT: srli a2, a0, 2 ; RV32-NEXT: andi a2, a2, 4 -; RV32-NEXT: srli a3, a0, 4 -; RV32-NEXT: andi a3, a3, 2 -; RV32-NEXT: or a2, a2, a3 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: srli a2, a0, 4 +; RV32-NEXT: andi a2, a2, 2 ; RV32-NEXT: or a1, a1, a2 ; RV32-NEXT: srli a0, a0, 6 ; RV32-NEXT: or a0, a1, a0 @@ -117,15 +117,15 @@ define i7 @bitreverse_i7(i7 %x) { ; RV64-NEXT: or a1, a1, a2 ; RV64-NEXT: slli a2, a0, 2 ; RV64-NEXT: andi a2, a2, 16 +; RV64-NEXT: or a1, a1, a2 ; RV64-NEXT: andi a0, a0, 127 -; RV64-NEXT: andi a3, a0, 8 -; RV64-NEXT: or a2, a2, a3 +; RV64-NEXT: andi a2, a0, 8 ; RV64-NEXT: or a1, a1, a2 ; RV64-NEXT: srliw a2, a0, 2 ; RV64-NEXT: andi a2, a2, 4 -; RV64-NEXT: srliw a3, a0, 4 -; RV64-NEXT: andi a3, a3, 2 -; RV64-NEXT: or a2, a2, a3 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: srliw a2, a0, 4 +; RV64-NEXT: andi a2, a2, 2 ; RV64-NEXT: or a1, a1, a2 ; RV64-NEXT: srliw a0, a0, 6 ; RV64-NEXT: or a0, a1, a0 @@ -145,24 +145,24 @@ define i24 @bitreverse_i24(i24 %x) { ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: lui a1, 1048335 ; RV32-NEXT: addi a1, a1, 240 -; RV32-NEXT: and a3, a1, a2 -; RV32-NEXT: and a3, a0, a3 +; RV32-NEXT: and a3, a0, a1 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: srli a3, a3, 4 ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: or a0, a3, a0 ; RV32-NEXT: lui a1, 1047757 ; RV32-NEXT: addi a1, a1, -820 -; RV32-NEXT: and a3, a1, a2 -; RV32-NEXT: and a3, a0, a3 +; RV32-NEXT: and a3, a0, a1 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: or a0, a3, a0 ; RV32-NEXT: lui a1, 1047211 ; RV32-NEXT: addi a1, a1, -1366 -; RV32-NEXT: and a2, a1, a2 -; RV32-NEXT: and a2, a0, a2 +; RV32-NEXT: and a3, a0, a1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: srli a2, a2, 1 ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: and a0, a0, a1 @@ -179,24 +179,24 @@ define i24 @bitreverse_i24(i24 %x) { ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: lui a1, 1048335 ; RV64-NEXT: addi a1, a1, 240 -; RV64-NEXT: and a3, a1, a2 -; RV64-NEXT: and a3, a0, a3 +; RV64-NEXT: and a3, a0, a1 +; RV64-NEXT: and a3, a3, a2 ; RV64-NEXT: srliw a3, a3, 4 ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: or a0, a3, a0 ; RV64-NEXT: lui a1, 1047757 ; RV64-NEXT: addi a1, a1, -820 -; RV64-NEXT: and a3, a1, a2 -; RV64-NEXT: and a3, a0, a3 +; RV64-NEXT: and a3, a0, a1 +; RV64-NEXT: and a3, a3, a2 ; RV64-NEXT: srliw a3, a3, 2 ; RV64-NEXT: slli a0, a0, 2 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: or a0, a3, a0 ; RV64-NEXT: lui a1, 1047211 ; RV64-NEXT: addiw a1, a1, -1366 -; RV64-NEXT: and a2, a1, a2 -; RV64-NEXT: and a2, a0, a2 +; RV64-NEXT: and a3, a0, a1 +; RV64-NEXT: and a2, a3, a2 ; RV64-NEXT: srliw a2, a2, 1 ; RV64-NEXT: slliw a0, a0, 1 ; RV64-NEXT: and a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll index 3df4aca40ec94..3480d5bb3503a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll @@ -1252,8 +1252,8 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-NEXT: sw a3, 16(sp) ; RV32-NEXT: lw a2, 0(a2) ; RV32-NEXT: add a0, a0, s1 -; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1308,8 +1308,8 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-NEXT: sd a3, 16(sp) ; RV64-NEXT: ld a2, 0(a2) ; RV64-NEXT: add a0, a0, s1 -; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, a0, a2 ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1363,8 +1363,8 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-WITHFP-NEXT: sw a3, -20(s0) ; RV32-WITHFP-NEXT: lw a2, 0(a2) ; RV32-WITHFP-NEXT: add a0, a0, s2 -; RV32-WITHFP-NEXT: add a1, a1, a2 ; RV32-WITHFP-NEXT: add a0, a0, a1 +; RV32-WITHFP-NEXT: add a0, a0, a2 ; RV32-WITHFP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-WITHFP-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-WITHFP-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1422,8 +1422,8 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-WITHFP-NEXT: sd a3, -40(s0) ; RV64-WITHFP-NEXT: ld a2, 0(a2) ; RV64-WITHFP-NEXT: add a0, a0, s2 -; RV64-WITHFP-NEXT: add a1, a1, a2 ; RV64-WITHFP-NEXT: add a0, a0, a1 +; RV64-WITHFP-NEXT: add a0, a0, a2 ; RV64-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64-WITHFP-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll index b6064198935a6..75aef089cc247 100644 --- a/llvm/test/CodeGen/RISCV/abds-neg.ll +++ b/llvm/test/CodeGen/RISCV/abds-neg.ll @@ -697,8 +697,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: snez a3, a3 ; RV32I-NEXT: neg a4, a6 ; RV32I-NEXT: sltu a5, a4, a3 -; RV32I-NEXT: neg a6, a7 -; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: add a5, a7, a5 +; RV32I-NEXT: neg a5, a5 ; RV32I-NEXT: snez a6, a1 ; RV32I-NEXT: add a2, a2, a6 ; RV32I-NEXT: neg a2, a2 @@ -816,8 +816,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: snez a3, a3 ; RV32ZBB-NEXT: neg a4, a6 ; RV32ZBB-NEXT: sltu a5, a4, a3 -; RV32ZBB-NEXT: neg a6, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: add a5, a7, a5 +; RV32ZBB-NEXT: neg a5, a5 ; RV32ZBB-NEXT: snez a6, a1 ; RV32ZBB-NEXT: add a2, a2, a6 ; RV32ZBB-NEXT: neg a2, a2 @@ -944,8 +944,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: snez a3, a3 ; RV32I-NEXT: neg a4, a6 ; RV32I-NEXT: sltu a5, a4, a3 -; RV32I-NEXT: neg a6, a7 -; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: add a5, a7, a5 +; RV32I-NEXT: neg a5, a5 ; RV32I-NEXT: snez a6, a1 ; RV32I-NEXT: add a2, a2, a6 ; RV32I-NEXT: neg a2, a2 @@ -1063,8 +1063,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: snez a3, a3 ; RV32ZBB-NEXT: neg a4, a6 ; RV32ZBB-NEXT: sltu a5, a4, a3 -; RV32ZBB-NEXT: neg a6, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: add a5, a7, a5 +; RV32ZBB-NEXT: neg a5, a5 ; RV32ZBB-NEXT: snez a6, a1 ; RV32ZBB-NEXT: add a2, a2, a6 ; RV32ZBB-NEXT: neg a2, a2 diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll index 91b044902a520..a3205acb46c6c 100644 --- a/llvm/test/CodeGen/RISCV/abds.ll +++ b/llvm/test/CodeGen/RISCV/abds.ll @@ -2076,8 +2076,8 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sltu t0, a7, a5 ; RV32I-NEXT: snez a2, a2 ; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: add a1, a1, t0 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, t0 ; RV32I-NEXT: sub a2, a7, a5 ; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: add a4, a4, a6 @@ -2139,8 +2139,8 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sltu t0, a7, a5 ; RV32ZBB-NEXT: snez a2, a2 ; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: add a1, a1, t0 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, t0 ; RV32ZBB-NEXT: sub a2, a7, a5 ; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: add a4, a4, a6 @@ -2207,8 +2207,8 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sltu t0, a7, a5 ; RV32I-NEXT: snez a2, a2 ; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: add a1, a1, t0 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, t0 ; RV32I-NEXT: sub a2, a7, a5 ; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: add a4, a4, a6 @@ -2270,8 +2270,8 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sltu t0, a7, a5 ; RV32ZBB-NEXT: snez a2, a2 ; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: add a1, a1, t0 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, t0 ; RV32ZBB-NEXT: sub a2, a7, a5 ; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: add a4, a4, a6 diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll index 54075f4169439..d09b1959750d8 100644 --- a/llvm/test/CodeGen/RISCV/abdu-neg.ll +++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll @@ -696,8 +696,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: snez a2, t3 ; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: add a1, a1, t5 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, t5 ; RV32I-NEXT: sub a2, t4, t1 ; RV32I-NEXT: add a3, a3, a7 ; RV32I-NEXT: neg a3, a3 @@ -808,8 +808,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: snez a2, t3 ; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: add a1, a1, t5 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, t5 ; RV32ZBB-NEXT: sub a2, t4, t1 ; RV32ZBB-NEXT: add a3, a3, a7 ; RV32ZBB-NEXT: neg a3, a3 @@ -929,8 +929,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: snez a2, t3 ; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: add a1, a1, t5 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, t5 ; RV32I-NEXT: sub a2, t4, t1 ; RV32I-NEXT: add a3, a3, a7 ; RV32I-NEXT: neg a3, a3 @@ -1041,8 +1041,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: snez a2, t3 ; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: add a1, a1, t5 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, t5 ; RV32ZBB-NEXT: sub a2, t4, t1 ; RV32ZBB-NEXT: add a3, a3, a7 ; RV32ZBB-NEXT: neg a3, a3 diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll index 3a4163a8bb50f..053b98755417b 100644 --- a/llvm/test/CodeGen/RISCV/addcarry.ll +++ b/llvm/test/CodeGen/RISCV/addcarry.ll @@ -18,9 +18,9 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind { ; RISCV32-NEXT: sltu a7, a4, a6 ; RISCV32-NEXT: sltu a5, a6, a5 ; RISCV32-NEXT: mulhu a6, a0, a3 -; RISCV32-NEXT: mulhu t0, a1, a2 -; RISCV32-NEXT: add a6, a6, t0 ; RISCV32-NEXT: add a5, a6, a5 +; RISCV32-NEXT: mulhu a6, a1, a2 +; RISCV32-NEXT: add a5, a5, a6 ; RISCV32-NEXT: add a5, a5, a7 ; RISCV32-NEXT: mul a6, a1, a3 ; RISCV32-NEXT: add a5, a5, a6 diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index 927e778c9dd9c..dc8ff7bb7c5c0 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -227,8 +227,8 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV32IA-NEXT: addi a5, a5, 1 ; RV32IA-NEXT: sltu a7, a7, a1 ; RV32IA-NEXT: neg a7, a7 -; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: and a5, a7, a5 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: sll a5, a5, a0 ; RV32IA-NEXT: and a7, a6, a4 ; RV32IA-NEXT: or a7, a7, a5 @@ -307,8 +307,8 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-NEXT: addi a6, a6, 1 ; RV64IA-NEXT: sltu t0, t0, a1 ; RV64IA-NEXT: negw t0, t0 -; RV64IA-NEXT: and a6, a6, a3 ; RV64IA-NEXT: and a6, t0, a6 +; RV64IA-NEXT: and a6, a6, a3 ; RV64IA-NEXT: sllw a6, a6, a0 ; RV64IA-NEXT: and a4, a4, a5 ; RV64IA-NEXT: or a6, a4, a6 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll index 278187f62cd75..8bcdb059a95fb 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll @@ -94,15 +94,15 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-FPELIM-LABEL: callee_aligned_stack: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 0(a2) -; RV32I-FPELIM-NEXT: lw a1, 8(sp) +; RV32I-FPELIM-NEXT: lw a1, 20(sp) ; RV32I-FPELIM-NEXT: lw a2, 0(sp) -; RV32I-FPELIM-NEXT: lw a3, 20(sp) +; RV32I-FPELIM-NEXT: lw a3, 8(sp) ; RV32I-FPELIM-NEXT: lw a4, 16(sp) ; RV32I-FPELIM-NEXT: add a0, a0, a7 -; RV32I-FPELIM-NEXT: add a1, a2, a1 -; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: add a3, a4, a3 +; RV32I-FPELIM-NEXT: add a0, a0, a2 ; RV32I-FPELIM-NEXT: add a0, a0, a3 +; RV32I-FPELIM-NEXT: add a0, a0, a4 +; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_aligned_stack: @@ -112,15 +112,15 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 0(a2) -; RV32I-WITHFP-NEXT: lw a1, 8(s0) +; RV32I-WITHFP-NEXT: lw a1, 20(s0) ; RV32I-WITHFP-NEXT: lw a2, 0(s0) -; RV32I-WITHFP-NEXT: lw a3, 20(s0) +; RV32I-WITHFP-NEXT: lw a3, 8(s0) ; RV32I-WITHFP-NEXT: lw a4, 16(s0) ; RV32I-WITHFP-NEXT: add a0, a0, a7 -; RV32I-WITHFP-NEXT: add a1, a2, a1 -; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: add a3, a4, a3 +; RV32I-WITHFP-NEXT: add a0, a0, a2 ; RV32I-WITHFP-NEXT: add a0, a0, a3 +; RV32I-WITHFP-NEXT: add a0, a0, a4 +; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll index bb082b0314d59..4386b5e21613c 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -87,16 +87,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; RV32I-FPELIM-NEXT: andi a0, a0, 255 ; RV32I-FPELIM-NEXT: slli a1, a1, 16 ; RV32I-FPELIM-NEXT: srli a1, a1, 16 -; RV32I-FPELIM-NEXT: add a0, a0, a2 ; RV32I-FPELIM-NEXT: add a0, a0, a1 +; RV32I-FPELIM-NEXT: add a0, a0, a2 ; RV32I-FPELIM-NEXT: xor a1, a4, t1 ; RV32I-FPELIM-NEXT: xor a2, a3, a7 ; RV32I-FPELIM-NEXT: or a1, a2, a1 ; RV32I-FPELIM-NEXT: seqz a1, a1 +; RV32I-FPELIM-NEXT: add a0, a1, a0 ; RV32I-FPELIM-NEXT: add a0, a0, a5 ; RV32I-FPELIM-NEXT: add a0, a0, a6 ; RV32I-FPELIM-NEXT: add a0, a0, t0 -; RV32I-FPELIM-NEXT: add a0, a1, a0 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_many_scalars: @@ -110,16 +110,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; RV32I-WITHFP-NEXT: andi a0, a0, 255 ; RV32I-WITHFP-NEXT: slli a1, a1, 16 ; RV32I-WITHFP-NEXT: srli a1, a1, 16 -; RV32I-WITHFP-NEXT: add a0, a0, a2 ; RV32I-WITHFP-NEXT: add a0, a0, a1 +; RV32I-WITHFP-NEXT: add a0, a0, a2 ; RV32I-WITHFP-NEXT: xor a1, a4, t1 ; RV32I-WITHFP-NEXT: xor a2, a3, a7 ; RV32I-WITHFP-NEXT: or a1, a2, a1 ; RV32I-WITHFP-NEXT: seqz a1, a1 +; RV32I-WITHFP-NEXT: add a0, a1, a0 ; RV32I-WITHFP-NEXT: add a0, a0, a5 ; RV32I-WITHFP-NEXT: add a0, a0, a6 ; RV32I-WITHFP-NEXT: add a0, a0, t0 -; RV32I-WITHFP-NEXT: add a0, a1, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -614,15 +614,15 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-FPELIM-LABEL: callee_aligned_stack: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 0(a2) -; RV32I-FPELIM-NEXT: lw a1, 8(sp) +; RV32I-FPELIM-NEXT: lw a1, 20(sp) ; RV32I-FPELIM-NEXT: lw a2, 0(sp) -; RV32I-FPELIM-NEXT: lw a3, 20(sp) +; RV32I-FPELIM-NEXT: lw a3, 8(sp) ; RV32I-FPELIM-NEXT: lw a4, 16(sp) ; RV32I-FPELIM-NEXT: add a0, a0, a7 -; RV32I-FPELIM-NEXT: add a1, a2, a1 -; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: add a3, a4, a3 +; RV32I-FPELIM-NEXT: add a0, a0, a2 ; RV32I-FPELIM-NEXT: add a0, a0, a3 +; RV32I-FPELIM-NEXT: add a0, a0, a4 +; RV32I-FPELIM-NEXT: add a0, a0, a1 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_aligned_stack: @@ -632,15 +632,15 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 0(a2) -; RV32I-WITHFP-NEXT: lw a1, 8(s0) +; RV32I-WITHFP-NEXT: lw a1, 20(s0) ; RV32I-WITHFP-NEXT: lw a2, 0(s0) -; RV32I-WITHFP-NEXT: lw a3, 20(s0) +; RV32I-WITHFP-NEXT: lw a3, 8(s0) ; RV32I-WITHFP-NEXT: lw a4, 16(s0) ; RV32I-WITHFP-NEXT: add a0, a0, a7 -; RV32I-WITHFP-NEXT: add a1, a2, a1 -; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: add a3, a4, a3 +; RV32I-WITHFP-NEXT: add a0, a0, a2 ; RV32I-WITHFP-NEXT: add a0, a0, a3 +; RV32I-WITHFP-NEXT: add a0, a0, a4 +; RV32I-WITHFP-NEXT: add a0, a0, a1 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll index 708cb00d1c45c..a67944cdd7fa1 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -529,16 +529,16 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-FPELIM-LABEL: callee_aligned_stack: ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: lw a0, 0(a2) -; ILP32E-FPELIM-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-NEXT: lw a1, 24(sp) ; ILP32E-FPELIM-NEXT: lw a2, 4(sp) ; ILP32E-FPELIM-NEXT: lw a3, 8(sp) -; ILP32E-FPELIM-NEXT: lw a4, 24(sp) +; ILP32E-FPELIM-NEXT: lw a4, 12(sp) ; ILP32E-FPELIM-NEXT: lw a5, 20(sp) ; ILP32E-FPELIM-NEXT: add a0, a0, a2 -; ILP32E-FPELIM-NEXT: add a1, a3, a1 -; ILP32E-FPELIM-NEXT: add a0, a0, a1 -; ILP32E-FPELIM-NEXT: add a4, a5, a4 +; ILP32E-FPELIM-NEXT: add a0, a0, a3 ; ILP32E-FPELIM-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-NEXT: add a0, a0, a1 ; ILP32E-FPELIM-NEXT: ret ; ; ILP32E-WITHFP-LABEL: callee_aligned_stack: @@ -552,16 +552,16 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-WITHFP-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: lw a0, 0(a2) -; ILP32E-WITHFP-NEXT: lw a1, 12(s0) +; ILP32E-WITHFP-NEXT: lw a1, 24(s0) ; ILP32E-WITHFP-NEXT: lw a2, 4(s0) ; ILP32E-WITHFP-NEXT: lw a3, 8(s0) -; ILP32E-WITHFP-NEXT: lw a4, 24(s0) +; ILP32E-WITHFP-NEXT: lw a4, 12(s0) ; ILP32E-WITHFP-NEXT: lw a5, 20(s0) ; ILP32E-WITHFP-NEXT: add a0, a0, a2 -; ILP32E-WITHFP-NEXT: add a1, a3, a1 -; ILP32E-WITHFP-NEXT: add a0, a0, a1 -; ILP32E-WITHFP-NEXT: add a4, a5, a4 +; ILP32E-WITHFP-NEXT: add a0, a0, a3 ; ILP32E-WITHFP-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-NEXT: add a0, a0, a1 ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: addi sp, sp, 8 @@ -570,16 +570,16 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_aligned_stack: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(a2) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 20(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a1, a3, a1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a4, a5, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_aligned_stack: @@ -591,16 +591,16 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(a2) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 24(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 4(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 8(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 24(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 12(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 20(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a1, a3, a1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a4, a5, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %1 = bitcast fp128 %c to i128 %2 = trunc i128 %1 to i32 @@ -1052,16 +1052,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; ILP32E-FPELIM-NEXT: andi a0, a0, 255 ; ILP32E-FPELIM-NEXT: slli a1, a1, 16 ; ILP32E-FPELIM-NEXT: srli a1, a1, 16 -; ILP32E-FPELIM-NEXT: add a0, a0, a2 ; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: add a0, a0, a2 ; ILP32E-FPELIM-NEXT: xor a1, a4, t1 ; ILP32E-FPELIM-NEXT: xor a2, a3, t0 ; ILP32E-FPELIM-NEXT: or a1, a2, a1 ; ILP32E-FPELIM-NEXT: seqz a1, a1 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 ; ILP32E-FPELIM-NEXT: add a0, a0, a5 ; ILP32E-FPELIM-NEXT: add a0, a0, a7 ; ILP32E-FPELIM-NEXT: add a0, a0, a6 -; ILP32E-FPELIM-NEXT: add a0, a1, a0 ; ILP32E-FPELIM-NEXT: ret ; ; ILP32E-WITHFP-LABEL: callee_many_scalars: @@ -1081,16 +1081,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; ILP32E-WITHFP-NEXT: andi a0, a0, 255 ; ILP32E-WITHFP-NEXT: slli a1, a1, 16 ; ILP32E-WITHFP-NEXT: srli a1, a1, 16 -; ILP32E-WITHFP-NEXT: add a0, a0, a2 ; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: add a0, a0, a2 ; ILP32E-WITHFP-NEXT: xor a1, a4, t1 ; ILP32E-WITHFP-NEXT: xor a2, a3, t0 ; ILP32E-WITHFP-NEXT: or a1, a2, a1 ; ILP32E-WITHFP-NEXT: seqz a1, a1 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 ; ILP32E-WITHFP-NEXT: add a0, a0, a5 ; ILP32E-WITHFP-NEXT: add a0, a0, a7 ; ILP32E-WITHFP-NEXT: add a0, a0, a6 -; ILP32E-WITHFP-NEXT: add a0, a1, a0 ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: addi sp, sp, 8 @@ -1105,16 +1105,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi a0, a0, 255 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: slli a1, a1, 16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: srli a1, a1, 16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a4, t1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, t0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a2, a1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a1, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a5 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a7 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a6 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_many_scalars: @@ -1132,16 +1132,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi a0, a0, 255 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: slli a1, a1, 16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: srli a1, a1, 16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a4, t1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, t0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a2, a1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a1, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a5 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a7 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a6 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %a_ext = zext i8 %a to i32 %b_ext = zext i16 %b to i32 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll index a0e1b002b7260..f5706ea871272 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -53,16 +53,16 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i128 %d, i32 %e, i32 %f, ; RV64I-NEXT: andi a0, a0, 255 ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srli a1, a1, 48 -; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: xor a1, a4, t1 ; RV64I-NEXT: xor a2, a3, a7 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: seqz a1, a1 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: add a0, a0, a5 ; RV64I-NEXT: add a0, a0, a6 -; RV64I-NEXT: add a0, a0, t0 -; RV64I-NEXT: addw a0, a1, a0 +; RV64I-NEXT: addw a0, a0, t0 ; RV64I-NEXT: ret %a_ext = zext i8 %a to i32 %b_ext = zext i16 %b to i32 @@ -328,15 +328,15 @@ define i64 @callee_aligned_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i128 %f ; should only be 8-byte aligned ; RV64I-LABEL: callee_aligned_stack: ; RV64I: # %bb.0: -; RV64I-NEXT: ld a0, 32(sp) +; RV64I-NEXT: ld a0, 40(sp) ; RV64I-NEXT: ld a1, 0(sp) ; RV64I-NEXT: ld a2, 16(sp) -; RV64I-NEXT: ld a3, 40(sp) +; RV64I-NEXT: ld a3, 32(sp) ; RV64I-NEXT: add a5, a5, a7 ; RV64I-NEXT: add a1, a5, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: add a1, a1, a3 ; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: add a0, a0, a3 ; RV64I-NEXT: ret %f_trunc = trunc i128 %f to i64 %1 = add i64 %f_trunc, %g diff --git a/llvm/test/CodeGen/RISCV/compress.ll b/llvm/test/CodeGen/RISCV/compress.ll index 8fb520fac41ee..e461ef5ad679d 100644 --- a/llvm/test/CodeGen/RISCV/compress.ll +++ b/llvm/test/CodeGen/RISCV/compress.ll @@ -35,8 +35,8 @@ define i32 @simple_arith(i32 %a, i32 %b) #0 { ; RV32IC-NEXT: c.andi a2, 0xb ; RV32IC-NEXT: c.slli a2, 0x7 ; RV32IC-NEXT: c.srai a1, 0x9 +; RV32IC-NEXT: c.add a1, a2 ; RV32IC-NEXT: sub a0, a1, a0 -; RV32IC-NEXT: c.add a0, a2 ; RV32IC-NEXT: c.jr ra %1 = add i32 %a, 1 %2 = and i32 %1, 11 diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll index d8019c0ad6112..dcd71457c9cca 100644 --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -587,9 +587,9 @@ define half @fold_demote_h_d(half %a, double %b) nounwind { ; RV32IFD-NEXT: srli a0, a0, 16 ; RV32IFD-NEXT: slli a1, a1, 17 ; RV32IFD-NEXT: srli a1, a1, 17 -; RV32IFD-NEXT: lui a2, 1048560 -; RV32IFD-NEXT: or a1, a1, a2 ; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: lui a1, 1048560 +; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: fmv.w.x fa0, a0 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret @@ -603,9 +603,9 @@ define half @fold_demote_h_d(half %a, double %b) nounwind { ; RV64IFD-NEXT: srli a0, a0, 63 ; RV64IFD-NEXT: slli a0, a0, 63 ; RV64IFD-NEXT: srli a0, a0, 48 -; RV64IFD-NEXT: lui a2, 1048560 -; RV64IFD-NEXT: or a1, a1, a2 ; RV64IFD-NEXT: or a0, a1, a0 +; RV64IFD-NEXT: lui a1, 1048560 +; RV64IFD-NEXT: or a0, a0, a1 ; RV64IFD-NEXT: fmv.w.x fa0, a0 ; RV64IFD-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/div-pow2.ll b/llvm/test/CodeGen/RISCV/div-pow2.ll index 254e675b4ed8b..ba621631823ad 100644 --- a/llvm/test/CodeGen/RISCV/div-pow2.ll +++ b/llvm/test/CodeGen/RISCV/div-pow2.ll @@ -213,8 +213,8 @@ define i64 @sdiv64_pow2_negative_2(i64 %a) { ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: snez a2, a3 ; RV32I-NEXT: srai a1, a1, 1 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_2: @@ -269,8 +269,8 @@ define i64 @sdiv64_pow2_negative_2048(i64 %a) { ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: snez a2, a3 ; RV32I-NEXT: srai a1, a1, 11 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_2048: @@ -326,8 +326,8 @@ define i64 @sdiv64_pow2_negative_4096(i64 %a) { ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: snez a2, a3 ; RV32I-NEXT: srai a1, a1, 12 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_4096: @@ -383,8 +383,8 @@ define i64 @sdiv64_pow2_negative_65536(i64 %a) { ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: snez a2, a3 ; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_65536: @@ -437,8 +437,8 @@ define i64 @sdiv64_pow2_negative_8589934592(i64 %a) { ; RV32I-NEXT: srai a1, a0, 31 ; RV32I-NEXT: srai a0, a0, 1 ; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll index b05eac9c9dee2..fa8f789c2c9fb 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll @@ -1611,12 +1611,12 @@ define i1 @fpclass(float %x) { ; RV32I-NEXT: slti a0, a0, 0 ; RV32I-NEXT: and a2, a2, a0 ; RV32I-NEXT: seqz a3, a1 -; RV32I-NEXT: lui a4, 522240 -; RV32I-NEXT: xor a5, a1, a4 -; RV32I-NEXT: seqz a5, a5 -; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: or a2, a3, a2 -; RV32I-NEXT: slt a3, a4, a1 +; RV32I-NEXT: lui a3, 522240 +; RV32I-NEXT: xor a4, a1, a3 +; RV32I-NEXT: seqz a4, a4 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slt a3, a3, a1 ; RV32I-NEXT: or a2, a2, a3 ; RV32I-NEXT: lui a3, 1046528 ; RV32I-NEXT: add a1, a1, a3 @@ -1638,12 +1638,12 @@ define i1 @fpclass(float %x) { ; RV64I-NEXT: slti a1, a1, 0 ; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: seqz a3, a0 -; RV64I-NEXT: lui a4, 522240 -; RV64I-NEXT: xor a5, a0, a4 -; RV64I-NEXT: seqz a5, a5 -; RV64I-NEXT: or a3, a3, a5 ; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: slt a3, a4, a0 +; RV64I-NEXT: lui a3, 522240 +; RV64I-NEXT: xor a4, a0, a3 +; RV64I-NEXT: seqz a4, a4 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: slt a3, a3, a0 ; RV64I-NEXT: or a2, a2, a3 ; RV64I-NEXT: lui a3, 1046528 ; RV64I-NEXT: add a0, a0, a3 diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll index a0c85ab4dca7f..479ac1c54e90f 100644 --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -225,8 +225,8 @@ define i64 @abs64(i64 %x) { ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: snez a2, a0 ; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: .LBB6_2: ; RV32I-NEXT: ret ; @@ -236,8 +236,8 @@ define i64 @abs64(i64 %x) { ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: snez a2, a0 ; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: .LBB6_2: ; RV32ZBB-NEXT: ret ; @@ -264,8 +264,8 @@ define i64 @select_abs64(i64 %x) { ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: snez a2, a0 ; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: .LBB7_2: ; RV32I-NEXT: ret ; @@ -275,8 +275,8 @@ define i64 @select_abs64(i64 %x) { ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: snez a2, a0 ; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: .LBB7_2: ; RV32ZBB-NEXT: ret ; @@ -314,11 +314,11 @@ define i128 @abs128(i128 %x) { ; RV32I-NEXT: sltu t0, a5, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a2, a1 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a2, a1, t0 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: neg a2, a1 ; RV32I-NEXT: sub a1, a5, a6 +; RV32I-NEXT: add a4, a4, a7 ; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: sub a4, a4, a7 ; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB8_2: ; RV32I-NEXT: sw a3, 0(a0) @@ -342,11 +342,11 @@ define i128 @abs128(i128 %x) { ; RV32ZBB-NEXT: sltu t0, a5, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a2, a1, t0 +; RV32ZBB-NEXT: add a1, a1, t0 +; RV32ZBB-NEXT: neg a2, a1 ; RV32ZBB-NEXT: sub a1, a5, a6 +; RV32ZBB-NEXT: add a4, a4, a7 ; RV32ZBB-NEXT: neg a4, a4 -; RV32ZBB-NEXT: sub a4, a4, a7 ; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB8_2: ; RV32ZBB-NEXT: sw a3, 0(a0) @@ -361,8 +361,8 @@ define i128 @abs128(i128 %x) { ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: snez a2, a0 ; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: neg a1, a1 -; RV64I-NEXT: sub a1, a1, a2 ; RV64I-NEXT: .LBB8_2: ; RV64I-NEXT: ret ; @@ -372,8 +372,8 @@ define i128 @abs128(i128 %x) { ; RV64ZBB-NEXT: # %bb.1: ; RV64ZBB-NEXT: snez a2, a0 ; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: add a1, a1, a2 ; RV64ZBB-NEXT: neg a1, a1 -; RV64ZBB-NEXT: sub a1, a1, a2 ; RV64ZBB-NEXT: .LBB8_2: ; RV64ZBB-NEXT: ret %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) @@ -396,11 +396,11 @@ define i128 @select_abs128(i128 %x) { ; RV32I-NEXT: sltu t0, a5, a6 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: add a1, a2, a1 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a2, a1, t0 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: neg a2, a1 ; RV32I-NEXT: sub a1, a5, a6 +; RV32I-NEXT: add a4, a4, a7 ; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: sub a4, a4, a7 ; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB9_2: ; RV32I-NEXT: sw a3, 0(a0) @@ -424,11 +424,11 @@ define i128 @select_abs128(i128 %x) { ; RV32ZBB-NEXT: sltu t0, a5, a6 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a2, a1, t0 +; RV32ZBB-NEXT: add a1, a1, t0 +; RV32ZBB-NEXT: neg a2, a1 ; RV32ZBB-NEXT: sub a1, a5, a6 +; RV32ZBB-NEXT: add a4, a4, a7 ; RV32ZBB-NEXT: neg a4, a4 -; RV32ZBB-NEXT: sub a4, a4, a7 ; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB9_2: ; RV32ZBB-NEXT: sw a3, 0(a0) @@ -443,8 +443,8 @@ define i128 @select_abs128(i128 %x) { ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: snez a2, a0 ; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: neg a1, a1 -; RV64I-NEXT: sub a1, a1, a2 ; RV64I-NEXT: .LBB9_2: ; RV64I-NEXT: ret ; @@ -454,8 +454,8 @@ define i128 @select_abs128(i128 %x) { ; RV64ZBB-NEXT: # %bb.1: ; RV64ZBB-NEXT: snez a2, a0 ; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: add a1, a1, a2 ; RV64ZBB-NEXT: neg a1, a1 -; RV64ZBB-NEXT: sub a1, a1, a2 ; RV64ZBB-NEXT: .LBB9_2: ; RV64ZBB-NEXT: ret %1 = icmp slt i128 %x, 0 diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.mir b/llvm/test/CodeGen/RISCV/machine-combiner.mir index e110dd9985f63..463f7cd90f7ba 100644 --- a/llvm/test/CodeGen/RISCV/machine-combiner.mir +++ b/llvm/test/CodeGen/RISCV/machine-combiner.mir @@ -64,10 +64,11 @@ body: | ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[FLW:%[0-9]+]]:fpr32 = FLW [[COPY5]], 0 :: (load (s32) from %ir.0) ; CHECK-NEXT: [[FLW1:%[0-9]+]]:fpr32 = FLW [[COPY4]], 0 :: (load (s32) from %ir.1) - ; CHECK-NEXT: [[FMADD_S:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FMADD_S [[FLW1]], [[FLW]], [[COPY3]], 7, implicit $frm - ; CHECK-NEXT: FSW killed [[FMADD_S]], [[COPY1]], 0 :: (store (s32) into %ir.4) - ; CHECK-NEXT: [[FNMSUB_S:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FNMSUB_S [[FLW1]], [[FLW]], [[COPY2]], 7, implicit $frm - ; CHECK-NEXT: FSW killed [[FNMSUB_S]], [[COPY]], 0 :: (store (s32) into %ir.5) + ; CHECK-NEXT: [[FMUL_S:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FMUL_S [[FLW1]], [[FLW]], 7, implicit $frm + ; CHECK-NEXT: [[FADD_S:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADD_S [[FMUL_S]], [[COPY3]], 7, implicit $frm + ; CHECK-NEXT: FSW killed [[FADD_S]], [[COPY1]], 0 :: (store (s32) into %ir.4) + ; CHECK-NEXT: [[FSUB_S:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FSUB_S [[COPY2]], [[FMUL_S]], 7, implicit $frm + ; CHECK-NEXT: FSW killed [[FSUB_S]], [[COPY]], 0 :: (store (s32) into %ir.5) ; CHECK-NEXT: PseudoRET %5:gpr = COPY $x13 %4:gpr = COPY $x12 diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll index cf290a0b8682d..352151fb63aaf 100644 --- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll +++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll @@ -20,15 +20,15 @@ define i32 @load_clustering_1(ptr nocapture %p) { ; NOCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 ; NOCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 ; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 -; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 +; NOCLUSTER: SU(6): %6:gpr = LW %0:gpr, 16 ; ; LDCLUSTER: ********** MI Scheduling ********** ; LDCLUSTER-LABEL: load_clustering_1:%bb.0 ; LDCLUSTER: *** Final schedule for %bb.0 *** -; LDCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 ; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 ; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 -; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 +; LDCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 +; LDCLUSTER: SU(6): %6:gpr = LW %0:gpr, 16 entry: %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3 %val0 = load i32, ptr %arrayidx0 diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index e9b84b3cd97ed..0f6924c5a2556 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -299,8 +299,8 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind { ; RV32I-NEXT: slli a4, a1, 2 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret @@ -315,8 +315,8 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret ; @@ -324,8 +324,8 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind { ; RV64IM: # %bb.0: ; RV64IM-NEXT: sext.w a0, a0 ; RV64IM-NEXT: slli a1, a0, 2 +; RV64IM-NEXT: add a0, a1, a0 ; RV64IM-NEXT: neg a0, a0 -; RV64IM-NEXT: sub a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ret %1 = sext i32 %a to i64 @@ -814,29 +814,29 @@ define i32 @muli32_m65(i32 %a) nounwind { ; RV32I-LABEL: muli32_m65: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 6 +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_m65: ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a1, a0, 6 +; RV32IM-NEXT: add a0, a1, a0 ; RV32IM-NEXT: neg a0, a0 -; RV32IM-NEXT: sub a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_m65: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: negw a0, a0 -; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_m65: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 6 +; RV64IM-NEXT: add a0, a1, a0 ; RV64IM-NEXT: negw a0, a0 -; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, -65 ret i32 %1 @@ -894,8 +894,8 @@ define i64 @muli64_m65(i64 %a) nounwind { ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a0 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: neg a1, a0 ; RV32I-NEXT: neg a0, a3 ; RV32I-NEXT: ret ; @@ -908,22 +908,22 @@ define i64 @muli64_m65(i64 %a) nounwind { ; RV32IM-NEXT: sub a2, a2, a0 ; RV32IM-NEXT: sub a1, a2, a1 ; RV32IM-NEXT: slli a2, a0, 6 +; RV32IM-NEXT: add a0, a2, a0 ; RV32IM-NEXT: neg a0, a0 -; RV32IM-NEXT: sub a0, a0, a2 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli64_m65: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_m65: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 6 +; RV64IM-NEXT: add a0, a1, a0 ; RV64IM-NEXT: neg a0, a0 -; RV64IM-NEXT: sub a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, -65 ret i64 %1 @@ -1386,9 +1386,9 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32IM-NEXT: mulhu t1, a4, a5 ; RV32IM-NEXT: sub a4, t1, a4 ; RV32IM-NEXT: add a1, a4, a1 -; RV32IM-NEXT: add a3, a2, a3 -; RV32IM-NEXT: sub a3, t3, a3 -; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: sub a4, t3, a2 +; RV32IM-NEXT: sub a4, a4, a3 +; RV32IM-NEXT: add a1, a4, a1 ; RV32IM-NEXT: add a1, a1, t0 ; RV32IM-NEXT: add a1, a7, a1 ; RV32IM-NEXT: add a1, a1, s0 @@ -1497,8 +1497,8 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32IM-NEXT: add t1, a7, t1 ; RV32IM-NEXT: sub t4, t1, a3 ; RV32IM-NEXT: slli t5, a4, 6 -; RV32IM-NEXT: sub t6, a4, a2 -; RV32IM-NEXT: sub t5, t6, t5 +; RV32IM-NEXT: sub t5, a4, t5 +; RV32IM-NEXT: sub t5, t5, a2 ; RV32IM-NEXT: add t6, t4, t5 ; RV32IM-NEXT: sltu s0, t6, t4 ; RV32IM-NEXT: neg s1, a3 @@ -1513,9 +1513,9 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32IM-NEXT: mulhu a5, a4, a5 ; RV32IM-NEXT: sub a5, a5, a4 ; RV32IM-NEXT: add a1, a5, a1 -; RV32IM-NEXT: add a3, a2, a3 -; RV32IM-NEXT: sub a3, t3, a3 -; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: sub a4, t3, a2 +; RV32IM-NEXT: sub a4, a4, a3 +; RV32IM-NEXT: add a1, a4, a1 ; RV32IM-NEXT: add a1, a1, t0 ; RV32IM-NEXT: add a1, a7, a1 ; RV32IM-NEXT: add a1, a1, s0 diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index 6f301882b452c..b717b71e1ce96 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -204,14 +204,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { ; RV32I-NEXT: bgez a1, .LBB5_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: snez a3, a0 +; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: .LBB5_2: ; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: snez a3, a0 -; RV32I-NEXT: neg a4, a1 -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: add a3, a1, a3 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: sw a1, 4(a2) ; RV32I-NEXT: mv a1, a3 @@ -222,14 +222,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { ; RV32ZBB-NEXT: bgez a1, .LBB5_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: snez a3, a0 +; RV32ZBB-NEXT: add a1, a1, a3 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: .LBB5_2: ; RV32ZBB-NEXT: sw a0, 0(a2) ; RV32ZBB-NEXT: snez a3, a0 -; RV32ZBB-NEXT: neg a4, a1 -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: add a3, a1, a3 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: sw a1, 4(a2) ; RV32ZBB-NEXT: mv a1, a3 diff --git a/llvm/test/CodeGen/RISCV/reduction-formation.ll b/llvm/test/CodeGen/RISCV/reduction-formation.ll index ced3a38ab5ea0..0a18fff6b346c 100644 --- a/llvm/test/CodeGen/RISCV/reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/reduction-formation.ll @@ -13,7 +13,7 @@ define i32 @reduce_sum_4xi32(<4 x i32> %v) { ; RV32-NEXT: lw a3, 8(a0) ; RV32-NEXT: lw a0, 12(a0) ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: ret ; @@ -24,7 +24,7 @@ define i32 @reduce_sum_4xi32(<4 x i32> %v) { ; RV64-NEXT: lw a3, 16(a0) ; RV64-NEXT: lw a0, 24(a0) ; RV64-NEXT: add a1, a1, a2 -; RV64-NEXT: add a0, a3, a0 +; RV64-NEXT: add a1, a1, a3 ; RV64-NEXT: addw a0, a1, a0 ; RV64-NEXT: ret %e0 = extractelement <4 x i32> %v, i32 0 @@ -45,7 +45,7 @@ define i32 @reduce_xor_4xi32(<4 x i32> %v) { ; RV32-NEXT: lw a3, 8(a0) ; RV32-NEXT: lw a0, 12(a0) ; RV32-NEXT: xor a1, a1, a2 -; RV32-NEXT: xor a0, a3, a0 +; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret ; @@ -56,7 +56,7 @@ define i32 @reduce_xor_4xi32(<4 x i32> %v) { ; RV64-NEXT: ld a3, 16(a0) ; RV64-NEXT: ld a0, 24(a0) ; RV64-NEXT: xor a1, a1, a2 -; RV64-NEXT: xor a0, a3, a0 +; RV64-NEXT: xor a1, a1, a3 ; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: ret %e0 = extractelement <4 x i32> %v, i32 0 @@ -77,7 +77,7 @@ define i32 @reduce_or_4xi32(<4 x i32> %v) { ; RV32-NEXT: lw a3, 8(a0) ; RV32-NEXT: lw a0, 12(a0) ; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: or a0, a3, a0 +; RV32-NEXT: or a1, a1, a3 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: ret ; @@ -88,7 +88,7 @@ define i32 @reduce_or_4xi32(<4 x i32> %v) { ; RV64-NEXT: ld a3, 16(a0) ; RV64-NEXT: ld a0, 24(a0) ; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: or a0, a3, a0 +; RV64-NEXT: or a1, a1, a3 ; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: ret %e0 = extractelement <4 x i32> %v, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rv32e.ll b/llvm/test/CodeGen/RISCV/rv32e.ll index ff73dd216da22..c68a465b96420 100644 --- a/llvm/test/CodeGen/RISCV/rv32e.ll +++ b/llvm/test/CodeGen/RISCV/rv32e.ll @@ -9,10 +9,10 @@ define i32 @exhausted(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) { ; CHECK: # %bb.0: ; CHECK-NEXT: lw t0, 0(sp) ; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a4, a5, a4 +; CHECK-NEXT: add a0, a3, a0 ; CHECK-NEXT: add a0, a4, a0 +; CHECK-NEXT: add a0, a5, a0 ; CHECK-NEXT: add a0, t0, a0 ; CHECK-NEXT: ret %1 = add i32 %a, %b diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll index 89273ef0e50b5..29dc593825137 100644 --- a/llvm/test/CodeGen/RISCV/rv32zba.ll +++ b/llvm/test/CodeGen/RISCV/rv32zba.ll @@ -673,8 +673,8 @@ define i32 @mul_neg3(i32 %a) { ; RV32I-LABEL: mul_neg3: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBA-LABEL: mul_neg3: @@ -700,8 +700,8 @@ define i32 @mul_neg5(i32 %a) { ; RV32I-LABEL: mul_neg5: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBA-LABEL: mul_neg5: diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index e24b1b41645cd..8ef8d96bdc7d1 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -1253,8 +1253,8 @@ define i64 @abs_i64(i64 %x) { ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: snez a2, a0 ; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: neg a1, a1 -; CHECK-NEXT: sub a1, a1, a2 ; CHECK-NEXT: .LBB37_2: ; CHECK-NEXT: ret %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) diff --git a/llvm/test/CodeGen/RISCV/rv64e.ll b/llvm/test/CodeGen/RISCV/rv64e.ll index 093d503750abc..22ca713888895 100644 --- a/llvm/test/CodeGen/RISCV/rv64e.ll +++ b/llvm/test/CodeGen/RISCV/rv64e.ll @@ -9,10 +9,10 @@ define i64 @exhausted(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) { ; CHECK: # %bb.0: ; CHECK-NEXT: ld t0, 0(sp) ; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a4, a5, a4 +; CHECK-NEXT: add a0, a3, a0 ; CHECK-NEXT: add a0, a4, a0 +; CHECK-NEXT: add a0, a5, a0 ; CHECK-NEXT: add a0, t0, a0 ; CHECK-NEXT: ret %1 = add i64 %a, %b diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index a381ee67354b3..d5d987766f0de 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -2407,8 +2407,8 @@ define i8 @array_index_sh1_sh0(ptr %p, i64 %idx1, i64 %idx2) { ; RV64I-LABEL: array_index_sh1_sh0: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a1, 1 -; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: ret ; @@ -2490,8 +2490,8 @@ define i8 @array_index_sh2_sh0(ptr %p, i64 %idx1, i64 %idx2) { ; RV64I-LABEL: array_index_sh2_sh0: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a1, 2 -; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: ret ; @@ -2573,8 +2573,8 @@ define i8 @array_index_sh3_sh0(ptr %p, i64 %idx1, i64 %idx2) { ; RV64I-LABEL: array_index_sh3_sh0: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: ret ; @@ -2659,17 +2659,18 @@ define i64 @array_index_lshr_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) { ; RV64I: # %bb.0: ; RV64I-NEXT: srli a1, a1, 58 ; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: slli a2, a2, 3 ; RV64I-NEXT: add a0, a0, a2 -; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: array_index_lshr_sh3_sh3: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srli a1, a1, 58 -; RV64ZBA-NEXT: sh3add a1, a1, a2 -; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: slli a1, a1, 6 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh3add a0, a2, a0 ; RV64ZBA-NEXT: ld a0, 0(a0) ; RV64ZBA-NEXT: ret %shr = lshr i64 %idx1, 58 @@ -2682,8 +2683,8 @@ define i8 @array_index_sh4_sh0(ptr %p, i64 %idx1, i64 %idx2) { ; CHECK-LABEL: array_index_sh4_sh0: ; CHECK: # %bb.0: ; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: lbu a0, 0(a0) ; CHECK-NEXT: ret %a = getelementptr inbounds [16 x i8], ptr %p, i64 %idx1, i64 %idx2 @@ -2762,15 +2763,16 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) { ; RV64I: # %bb.0: ; RV64I-NEXT: srliw a2, a2, 6 ; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: test_gep_gep_dont_crash: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srliw a2, a2, 6 -; RV64ZBA-NEXT: add a1, a2, a1 +; RV64ZBA-NEXT: slli a2, a2, 3 +; RV64ZBA-NEXT: add a0, a0, a2 ; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: ret %lshr = lshr i64 %a2, 6 @@ -2825,8 +2827,8 @@ define i64 @mul_neg3(i64 %a) { ; RV64I-LABEL: mul_neg3: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mul_neg3: @@ -2852,8 +2854,8 @@ define i64 @mul_neg5(i64 %a) { ; RV64I-LABEL: mul_neg5: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mul_neg5: diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll index 52811133c53f3..84fff0a3cf372 100644 --- a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll @@ -234,7 +234,7 @@ define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data ; RV64-NEXT: vcpop.m a2, v8 ; RV64-NEXT: cpop a3, a3 ; RV64-NEXT: cpop a1, a1 -; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: add a1, a3, a1 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV64-NEXT: vse8.v v16, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index bfcc7017178e3..7de6fc21a962f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -137,9 +137,9 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFH32-NEXT: vmv.x.s a4, v8 ; ZVFH32-NEXT: and a3, a4, a3 ; ZVFH32-NEXT: slli a3, a3, 15 +; ZVFH32-NEXT: or a2, a2, a3 ; ZVFH32-NEXT: slli a1, a1, 30 ; ZVFH32-NEXT: or a1, a2, a1 -; ZVFH32-NEXT: or a1, a1, a3 ; ZVFH32-NEXT: sw a1, 0(a0) ; ZVFH32-NEXT: ret ; @@ -155,10 +155,10 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFH64-NEXT: vmv.x.s a3, v8 ; ZVFH64-NEXT: and a2, a3, a2 ; ZVFH64-NEXT: slli a2, a2, 15 +; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFH64-NEXT: vmv.x.s a3, v8 -; ZVFH64-NEXT: slli a3, a3, 30 -; ZVFH64-NEXT: or a1, a1, a3 +; ZVFH64-NEXT: vmv.x.s a2, v8 +; ZVFH64-NEXT: slli a2, a2, 30 ; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: sw a1, 0(a0) ; ZVFH64-NEXT: slli a1, a1, 19 @@ -183,9 +183,9 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN32-NEXT: vmv.x.s a4, v8 ; ZVFHMIN32-NEXT: and a3, a4, a3 ; ZVFHMIN32-NEXT: slli a3, a3, 15 +; ZVFHMIN32-NEXT: or a2, a2, a3 ; ZVFHMIN32-NEXT: slli a1, a1, 30 ; ZVFHMIN32-NEXT: or a1, a2, a1 -; ZVFHMIN32-NEXT: or a1, a1, a3 ; ZVFHMIN32-NEXT: sw a1, 0(a0) ; ZVFHMIN32-NEXT: ret ; @@ -201,10 +201,10 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN64-NEXT: vmv.x.s a3, v8 ; ZVFHMIN64-NEXT: and a2, a3, a2 ; ZVFHMIN64-NEXT: slli a2, a2, 15 +; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: slli a3, a3, 30 -; ZVFHMIN64-NEXT: or a1, a1, a3 +; ZVFHMIN64-NEXT: vmv.x.s a2, v8 +; ZVFHMIN64-NEXT: slli a2, a2, 30 ; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: sw a1, 0(a0) ; ZVFHMIN64-NEXT: slli a1, a1, 19 @@ -234,9 +234,9 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFH32-NEXT: vmv.x.s a4, v8 ; ZVFH32-NEXT: and a3, a4, a3 ; ZVFH32-NEXT: slli a3, a3, 15 +; ZVFH32-NEXT: or a2, a2, a3 ; ZVFH32-NEXT: slli a1, a1, 30 ; ZVFH32-NEXT: or a1, a2, a1 -; ZVFH32-NEXT: or a1, a1, a3 ; ZVFH32-NEXT: sw a1, 0(a0) ; ZVFH32-NEXT: ret ; @@ -252,10 +252,10 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFH64-NEXT: vmv.x.s a3, v8 ; ZVFH64-NEXT: and a2, a3, a2 ; ZVFH64-NEXT: slli a2, a2, 15 +; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFH64-NEXT: vmv.x.s a3, v8 -; ZVFH64-NEXT: slli a3, a3, 30 -; ZVFH64-NEXT: or a1, a1, a3 +; ZVFH64-NEXT: vmv.x.s a2, v8 +; ZVFH64-NEXT: slli a2, a2, 30 ; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: sw a1, 0(a0) ; ZVFH64-NEXT: slli a1, a1, 19 @@ -280,9 +280,9 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN32-NEXT: vmv.x.s a4, v8 ; ZVFHMIN32-NEXT: and a3, a4, a3 ; ZVFHMIN32-NEXT: slli a3, a3, 15 +; ZVFHMIN32-NEXT: or a2, a2, a3 ; ZVFHMIN32-NEXT: slli a1, a1, 30 ; ZVFHMIN32-NEXT: or a1, a2, a1 -; ZVFHMIN32-NEXT: or a1, a1, a3 ; ZVFHMIN32-NEXT: sw a1, 0(a0) ; ZVFHMIN32-NEXT: ret ; @@ -298,10 +298,10 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN64-NEXT: vmv.x.s a3, v8 ; ZVFHMIN64-NEXT: and a2, a3, a2 ; ZVFHMIN64-NEXT: slli a2, a2, 15 +; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: slli a3, a3, 30 -; ZVFHMIN64-NEXT: or a1, a1, a3 +; ZVFHMIN64-NEXT: vmv.x.s a2, v8 +; ZVFHMIN64-NEXT: slli a2, a2, 30 ; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: sw a1, 0(a0) ; ZVFHMIN64-NEXT: slli a1, a1, 19 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll index e0c676788dccc..f6f3097fb1e6a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll @@ -28,8 +28,8 @@ define i8 @explode_4xi8(<4 x i8> %v) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a2, v8 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: ret %e0 = extractelement <4 x i8> %v, i32 0 %e1 = extractelement <4 x i8> %v, i32 1 @@ -62,11 +62,11 @@ define i8 @explode_8xi8(<8 x i8> %v) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a6, v8 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, a6, a0 -; CHECK-NEXT: add a2, a2, a3 -; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: add a0, a0, a5 ; CHECK-NEXT: ret %e0 = extractelement <8 x i8> %v, i32 0 @@ -123,20 +123,20 @@ define i8 @explode_16xi8(<16 x i8> %v) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s t6, v8 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, t6, a0 -; CHECK-NEXT: add a2, a2, a3 -; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, a0, a2 -; CHECK-NEXT: add a5, a5, a6 -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a5, a5, t0 +; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: add a0, a0, a5 -; CHECK-NEXT: add t1, t1, t2 -; CHECK-NEXT: add t1, t1, t3 -; CHECK-NEXT: add t1, t1, t4 -; CHECK-NEXT: add t1, t1, t5 +; CHECK-NEXT: add a0, a0, a6 +; CHECK-NEXT: add a0, a0, a7 +; CHECK-NEXT: add a0, a0, t0 ; CHECK-NEXT: add a0, a0, t1 +; CHECK-NEXT: add a0, a0, t2 +; CHECK-NEXT: add a0, a0, t3 +; CHECK-NEXT: add a0, a0, t4 +; CHECK-NEXT: add a0, a0, t5 ; CHECK-NEXT: ret %e0 = extractelement <16 x i8> %v, i32 0 %e1 = extractelement <16 x i8> %v, i32 1 @@ -198,8 +198,8 @@ define i16 @explode_4xi16(<4 x i16> %v) { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a2, v8 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: ret %e0 = extractelement <4 x i16> %v, i32 0 %e1 = extractelement <4 x i16> %v, i32 1 @@ -232,11 +232,11 @@ define i16 @explode_8xi16(<8 x i16> %v) { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a6, v8 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, a6, a0 -; CHECK-NEXT: add a2, a2, a3 -; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: add a0, a0, a5 ; CHECK-NEXT: ret %e0 = extractelement <8 x i16> %v, i32 0 @@ -294,20 +294,20 @@ define i16 @explode_16xi16(<16 x i16> %v) { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s t6, v8 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, t6, a0 -; CHECK-NEXT: add a2, a2, a3 -; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, a0, a2 -; CHECK-NEXT: add a5, a5, a6 -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a5, a5, t0 +; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: add a0, a0, a5 -; CHECK-NEXT: add t1, t1, t2 -; CHECK-NEXT: add t1, t1, t3 -; CHECK-NEXT: add t1, t1, t4 -; CHECK-NEXT: add t1, t1, t5 +; CHECK-NEXT: add a0, a0, a6 +; CHECK-NEXT: add a0, a0, a7 +; CHECK-NEXT: add a0, a0, t0 ; CHECK-NEXT: add a0, a0, t1 +; CHECK-NEXT: add a0, a0, t2 +; CHECK-NEXT: add a0, a0, t3 +; CHECK-NEXT: add a0, a0, t4 +; CHECK-NEXT: add a0, a0, t5 ; CHECK-NEXT: ret %e0 = extractelement <16 x i16> %v, i32 0 %e1 = extractelement <16 x i16> %v, i32 1 @@ -369,8 +369,8 @@ define i32 @explode_4xi32(<4 x i32> %v) { ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vredxor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a2, v8 -; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: explode_4xi32: @@ -384,8 +384,8 @@ define i32 @explode_4xi32(<4 x i32> %v) { ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a2, v8 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: addw a0, a2, a0 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: addw a0, a0, a1 ; RV64-NEXT: ret %e0 = extractelement <4 x i32> %v, i32 0 %e1 = extractelement <4 x i32> %v, i32 1 @@ -419,11 +419,11 @@ define i32 @explode_8xi32(<8 x i32> %v) { ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vredxor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a6, v8 -; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, a6, a0 -; RV32-NEXT: add a2, a2, a3 -; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: add a0, a0, a4 ; RV32-NEXT: add a0, a0, a5 ; RV32-NEXT: ret ; @@ -447,11 +447,11 @@ define i32 @explode_8xi32(<8 x i32> %v) { ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a6, v8 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, a6, a0 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: add a0, a0, a4 ; RV64-NEXT: addw a0, a0, a5 ; RV64-NEXT: ret %e0 = extractelement <8 x i32> %v, i32 0 @@ -513,20 +513,20 @@ define i32 @explode_16xi32(<16 x i32> %v) { ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vredxor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s t6, v8 -; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, t6, a0 -; RV32-NEXT: add a2, a2, a3 -; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: add a0, a0, a4 ; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a7, a7, t0 -; RV32-NEXT: add a7, a7, t1 +; RV32-NEXT: add a0, a0, a6 ; RV32-NEXT: add a0, a0, a7 -; RV32-NEXT: add t2, t2, t3 -; RV32-NEXT: add t2, t2, t4 -; RV32-NEXT: add t2, t2, t5 +; RV32-NEXT: add a0, a0, t0 +; RV32-NEXT: add a0, a0, t1 ; RV32-NEXT: add a0, a0, t2 +; RV32-NEXT: add a0, a0, t3 +; RV32-NEXT: add a0, a0, t4 +; RV32-NEXT: add a0, a0, t5 ; RV32-NEXT: addi sp, s0, -128 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload @@ -573,20 +573,20 @@ define i32 @explode_16xi32(<16 x i32> %v) { ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s t6, v8 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, t6, a0 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: add a0, a0, a4 ; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a7, a7, t0 -; RV64-NEXT: add a7, a7, t1 +; RV64-NEXT: add a0, a0, a6 ; RV64-NEXT: add a0, a0, a7 -; RV64-NEXT: add t2, t2, t3 -; RV64-NEXT: add t2, t2, t4 -; RV64-NEXT: add t2, t2, t5 -; RV64-NEXT: addw a0, a0, t2 +; RV64-NEXT: add a0, a0, t0 +; RV64-NEXT: add a0, a0, t1 +; RV64-NEXT: add a0, a0, t2 +; RV64-NEXT: add a0, a0, t3 +; RV64-NEXT: add a0, a0, t4 +; RV64-NEXT: addw a0, a0, t5 ; RV64-NEXT: addi sp, s0, -128 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload @@ -693,8 +693,8 @@ define i64 @explode_4xi64(<4 x i64> %v) { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a2, v8 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: ret %e0 = extractelement <4 x i64> %v, i32 0 %e1 = extractelement <4 x i64> %v, i32 1 @@ -750,20 +750,20 @@ define i64 @explode_8xi64(<8 x i64> %v) { ; RV32-NEXT: add a0, a0, a3 ; RV32-NEXT: add a4, a2, a4 ; RV32-NEXT: sltu a1, a4, a2 -; RV32-NEXT: add a1, a1, a5 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, a5 ; RV32-NEXT: add a6, a4, a6 ; RV32-NEXT: sltu a1, a6, a4 -; RV32-NEXT: add a1, a1, a7 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, a7 ; RV32-NEXT: add t0, a6, t0 ; RV32-NEXT: sltu a1, t0, a6 -; RV32-NEXT: add a1, a1, t1 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, t1 ; RV32-NEXT: add t2, t0, t2 ; RV32-NEXT: sltu a1, t2, t0 -; RV32-NEXT: add a1, a1, t3 -; RV32-NEXT: add a1, a0, a1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a1, a0, t3 ; RV32-NEXT: add a0, t2, t4 ; RV32-NEXT: sltu a2, a0, t2 ; RV32-NEXT: add a1, a1, a2 @@ -796,11 +796,11 @@ define i64 @explode_8xi64(<8 x i64> %v) { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a6, v8 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, a6, a0 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: add a0, a0, a4 ; RV64-NEXT: add a0, a0, a5 ; RV64-NEXT: addi sp, s0, -128 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload @@ -902,8 +902,8 @@ define i64 @explode_16xi64(<16 x i64> %v) { ; RV32-NEXT: vmv.x.s s7, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 13 ; RV32-NEXT: vsrl.vx v24, v16, a0 -; RV32-NEXT: vmv.x.s s9, v24 -; RV32-NEXT: vmv.x.s s8, v16 +; RV32-NEXT: vmv.x.s s8, v24 +; RV32-NEXT: vmv.x.s s9, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 14 ; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.s.x v17, zero @@ -924,59 +924,59 @@ define i64 @explode_16xi64(<16 x i64> %v) { ; RV32-NEXT: add a0, a0, a3 ; RV32-NEXT: add a4, a2, a4 ; RV32-NEXT: sltu a1, a4, a2 -; RV32-NEXT: add a1, a1, a5 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, a5 ; RV32-NEXT: add a6, a4, a6 ; RV32-NEXT: sltu a1, a6, a4 -; RV32-NEXT: add a1, a1, a7 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, a7 ; RV32-NEXT: add t0, a6, t0 ; RV32-NEXT: sltu a1, t0, a6 -; RV32-NEXT: add a1, a1, t1 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, t1 ; RV32-NEXT: add t2, t0, t2 ; RV32-NEXT: sltu a1, t2, t0 -; RV32-NEXT: add a1, a1, t3 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, t3 ; RV32-NEXT: add t4, t2, t4 ; RV32-NEXT: sltu a1, t4, t2 -; RV32-NEXT: add a1, a1, t5 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, t5 ; RV32-NEXT: add t6, t4, t6 ; RV32-NEXT: sltu a1, t6, t4 -; RV32-NEXT: add a1, a1, s0 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, s0 ; RV32-NEXT: add s1, t6, s1 ; RV32-NEXT: sltu a1, s1, t6 -; RV32-NEXT: add a1, a1, s2 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, s2 ; RV32-NEXT: add s3, s1, s3 ; RV32-NEXT: sltu a1, s3, s1 -; RV32-NEXT: add a1, a1, s4 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, s4 ; RV32-NEXT: add s5, s3, s5 ; RV32-NEXT: sltu a1, s5, s3 -; RV32-NEXT: add a1, a1, s6 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, s6 ; RV32-NEXT: add s7, s5, s7 ; RV32-NEXT: sltu a1, s7, s5 -; RV32-NEXT: add a1, a1, s9 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, s8 +; RV32-NEXT: add s9, s7, s9 +; RV32-NEXT: sltu a1, s9, s7 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: vmv.x.s a1, v24 -; RV32-NEXT: add s8, s7, s8 -; RV32-NEXT: sltu a2, s8, s7 -; RV32-NEXT: add a1, a2, a1 -; RV32-NEXT: vmv.x.s a2, v16 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vmv.x.s a1, v0 -; RV32-NEXT: add a2, s8, a2 -; RV32-NEXT: sltu a3, a2, s8 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add a1, a0, a1 +; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: add a1, s9, a1 +; RV32-NEXT: sltu a2, a1, s9 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: vmv.x.s a2, v0 +; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: add a0, a2, a0 -; RV32-NEXT: sltu a2, a0, a2 -; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sltu a1, a0, a1 +; RV32-NEXT: add a1, a2, a1 ; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload @@ -1027,20 +1027,20 @@ define i64 @explode_16xi64(<16 x i64> %v) { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s t6, v8 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, t6, a0 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: add a0, a0, a3 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, a5, a7 +; RV64-NEXT: add a0, a0, a4 ; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add t0, t0, t1 -; RV64-NEXT: add t0, t0, t2 -; RV64-NEXT: add t0, t0, t3 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, a0, a7 ; RV64-NEXT: add a0, a0, t0 -; RV64-NEXT: add t4, t4, t5 +; RV64-NEXT: add a0, a0, t1 +; RV64-NEXT: add a0, a0, t2 +; RV64-NEXT: add a0, a0, t3 ; RV64-NEXT: add a0, a0, t4 +; RV64-NEXT: add a0, a0, t5 ; RV64-NEXT: addi sp, s0, -256 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload @@ -1113,20 +1113,20 @@ define i32 @explode_16xi32_exact_vlen(<16 x i32> %v) vscale_range(2, 2) { ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vredxor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s t6, v8 -; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, t6, a0 -; RV32-NEXT: add a2, a2, a3 -; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, a5, a7 -; RV32-NEXT: add a5, a5, t0 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: add a0, a0, a4 ; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add t1, t1, t2 -; RV32-NEXT: add t1, t1, t3 -; RV32-NEXT: add t1, t1, t4 -; RV32-NEXT: add t1, t1, t5 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, a0, a7 +; RV32-NEXT: add a0, a0, t0 ; RV32-NEXT: add a0, a0, t1 +; RV32-NEXT: add a0, a0, t2 +; RV32-NEXT: add a0, a0, t3 +; RV32-NEXT: add a0, a0, t4 +; RV32-NEXT: add a0, a0, t5 ; RV32-NEXT: ret ; ; RV64-LABEL: explode_16xi32_exact_vlen: @@ -1161,20 +1161,20 @@ define i32 @explode_16xi32_exact_vlen(<16 x i32> %v) vscale_range(2, 2) { ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s t6, v8 -; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, t6, a0 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, a5, a7 -; RV64-NEXT: add a5, a5, t0 +; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: add a0, a0, a4 ; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add t1, t1, t2 -; RV64-NEXT: add t1, t1, t3 -; RV64-NEXT: add t1, t1, t4 -; RV64-NEXT: add t1, t1, t5 -; RV64-NEXT: addw a0, a0, t1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, a0, a7 +; RV64-NEXT: add a0, a0, t0 +; RV64-NEXT: add a0, a0, t1 +; RV64-NEXT: add a0, a0, t2 +; RV64-NEXT: add a0, a0, t3 +; RV64-NEXT: add a0, a0, t4 +; RV64-NEXT: addw a0, a0, t5 ; RV64-NEXT: ret %e0 = extractelement <16 x i32> %v, i32 0 %e1 = extractelement <16 x i32> %v, i32 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll index a8798474d669a..60298cce3d122 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll @@ -441,8 +441,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) { ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vmv.x.s a3, v8 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: add a0, a0, a3 ; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: reduce_sum_4xi32_reduce_order: @@ -456,8 +456,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) { ; RV64-NEXT: vslidedown.vi v8, v8, 3 ; RV64-NEXT: vmv.x.s a3, v8 ; RV64-NEXT: add a1, a1, a2 -; RV64-NEXT: add a0, a0, a3 -; RV64-NEXT: addw a0, a0, a1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: addw a0, a0, a3 ; RV64-NEXT: ret %e0 = extractelement <4 x i32> %v, i32 0 %e1 = extractelement <4 x i32> %v, i32 1 @@ -891,8 +891,8 @@ define float @reduce_fadd_4xi32_non_associative2(ptr %p) { ; CHECK-NEXT: vslidedown.vi v8, v8, 3 ; CHECK-NEXT: vfmv.f.s fa2, v8 ; CHECK-NEXT: fadd.s fa5, fa5, fa4 -; CHECK-NEXT: fadd.s fa4, fa3, fa2 -; CHECK-NEXT: fadd.s fa0, fa5, fa4 +; CHECK-NEXT: fadd.s fa5, fa5, fa3 +; CHECK-NEXT: fadd.s fa0, fa5, fa2 ; CHECK-NEXT: ret %v = load <4 x float>, ptr %p, align 256 %e0 = extractelement <4 x float> %v, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll index 169d99abb13c2..2caffbc6c51fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -93,12 +93,12 @@ define void @store_v6i1(ptr %p, <6 x i1> %v) { ; CHECK-NEXT: andi a3, a2, 2 ; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: andi a3, a2, 4 -; CHECK-NEXT: andi a4, a2, 8 -; CHECK-NEXT: or a3, a3, a4 +; CHECK-NEXT: or a1, a1, a3 +; CHECK-NEXT: andi a3, a2, 8 ; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: andi a3, a2, 16 +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: andi a2, a2, -32 -; CHECK-NEXT: or a2, a3, a2 ; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: andi a1, a1, 63 ; CHECK-NEXT: sb a1, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index b49e323478e8c..b49aed1d02656 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -540,8 +540,8 @@ define void @reverse_strided_runtime_4xv2f32(ptr %x, ptr %z, i64 %s) { ; CHECK-LABEL: reverse_strided_runtime_4xv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: add a0, a0, a2 -; CHECK-NEXT: add a3, a2, a2 -; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: neg a2, a2 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 9385fa69b2f04..b703b663a12ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -799,35 +799,35 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: addi a5, a5, 1 ; CHECK-NEXT: andi a6, a5, -32 ; CHECK-NEXT: add a4, a6, a2 -; CHECK-NEXT: slli t0, a2, 2 -; CHECK-NEXT: add a7, a0, a2 -; CHECK-NEXT: add a2, a1, a2 -; CHECK-NEXT: add a2, a2, t0 +; CHECK-NEXT: slli a7, a2, 2 +; CHECK-NEXT: add a7, a7, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: add a7, a1, a7 ; CHECK-NEXT: add t0, a4, a0 ; CHECK-NEXT: li t2, 32 ; CHECK-NEXT: li t1, 5 ; CHECK-NEXT: vsetvli zero, t2, e8, m1, ta, ma ; CHECK-NEXT: .LBB14_3: # %bb15 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vlse8.v v8, (a2), t1 -; CHECK-NEXT: vle8.v v9, (a7) +; CHECK-NEXT: vlse8.v v8, (a7), t1 +; CHECK-NEXT: vle8.v v9, (a2) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a7) -; CHECK-NEXT: addi a7, a7, 32 -; CHECK-NEXT: addi a2, a2, 160 -; CHECK-NEXT: bne a7, t0, .LBB14_3 +; CHECK-NEXT: vse8.v v8, (a2) +; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: addi a7, a7, 160 +; CHECK-NEXT: bne a2, t0, .LBB14_3 ; CHECK-NEXT: # %bb.4: # %bb30 ; CHECK-NEXT: beq a5, a6, .LBB14_7 ; CHECK-NEXT: .LBB14_5: # %bb32 ; CHECK-NEXT: add a2, a0, a4 ; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: add a5, a5, a4 ; CHECK-NEXT: add a1, a1, a5 ; CHECK-NEXT: subw a3, a3, a4 ; CHECK-NEXT: slli a3, a3, 32 ; CHECK-NEXT: srli a3, a3, 32 -; CHECK-NEXT: add a0, a4, a0 -; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a0, a3, a0 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: .LBB14_6: # %bb35 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll index 79bd60d1702f3..37f92508a14fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll @@ -31,7 +31,7 @@ define @simple_vadd_vv( %0, ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vv v9, v8, v9 -; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vadd.vv v9, v8, v9 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret entry: @@ -61,7 +61,7 @@ define @simple_vadd_vsub_vv( %0, @simple_vmul_vv( %0, ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmul.vv v9, v8, v9 -; CHECK-NEXT: vmul.vv v8, v8, v8 +; CHECK-NEXT: vmul.vv v9, v8, v9 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret entry: @@ -124,8 +124,8 @@ define @vadd_vv_passthru( %0, @llvm.riscv.vadd.nxv1i8.nxv1i8( @@ -187,8 +187,8 @@ define @vadd_vv_mask( %0, % ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vadd.vv v9, v8, v8, v0.t -; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll index 5fa802b7f27ca..1b78653e03f78 100644 --- a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll @@ -424,8 +424,8 @@ define iXLen2 @test_udiv_65535(iXLen2 %x) nounwind { ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: sub a1, a4, a0 ; RV32-NEXT: slli a0, a3, 16 -; RV32-NEXT: neg a2, a3 -; RV32-NEXT: sub a0, a2, a0 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: neg a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_udiv_65535: diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll index 7c291bbceedc6..ea71e6a5d5a5a 100644 --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -240,8 +240,8 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind { ; RV32IM-NEXT: add a1, a1, a2 ; RV32IM-NEXT: li a2, 95 ; RV32IM-NEXT: mul a2, a1, a2 -; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: sub a0, a0, a2 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_srem_sdiv: @@ -278,8 +278,8 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind { ; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: li a2, 95 ; RV64IM-NEXT: mul a2, a1, a2 -; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: subw a0, a0, a2 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = srem i32 %x, 95 %2 = sdiv i32 %x, 95 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 5d00e90366c3b..46145ce1a9a3c 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -363,9 +363,9 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32-NEXT: srli a2, a1, 31 ; RV32-NEXT: andi a1, a1, 1 ; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: or a1, a2, a1 ; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: or a0, a2, a0 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: sw a0, 8(s0) ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -515,9 +515,9 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32M-NEXT: srli a2, a1, 31 ; RV32M-NEXT: andi a1, a1, 1 ; RV32M-NEXT: slli a1, a1, 1 +; RV32M-NEXT: or a1, a2, a1 ; RV32M-NEXT: slli a0, a0, 2 -; RV32M-NEXT: or a0, a2, a0 -; RV32M-NEXT: or a0, a0, a1 +; RV32M-NEXT: or a0, a1, a0 ; RV32M-NEXT: sw a0, 8(s0) ; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -540,29 +540,29 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64M-NEXT: or a4, a5, a4 ; RV64M-NEXT: srai a4, a4, 31 ; RV64M-NEXT: slli a3, a3, 32 +; RV64M-NEXT: lui a5, %hi(.LCPI3_0) +; RV64M-NEXT: ld a5, %lo(.LCPI3_0)(a5) ; RV64M-NEXT: or a2, a2, a3 ; RV64M-NEXT: slli a2, a2, 29 -; RV64M-NEXT: lui a3, %hi(.LCPI3_0) -; RV64M-NEXT: ld a3, %lo(.LCPI3_0)(a3) ; RV64M-NEXT: srai a2, a2, 31 -; RV64M-NEXT: slli a1, a1, 31 -; RV64M-NEXT: srai a1, a1, 31 -; RV64M-NEXT: mulh a3, a2, a3 +; RV64M-NEXT: mulh a3, a2, a5 ; RV64M-NEXT: srli a5, a3, 63 ; RV64M-NEXT: srai a3, a3, 1 ; RV64M-NEXT: add a3, a3, a5 +; RV64M-NEXT: slli a5, a3, 2 +; RV64M-NEXT: add a3, a5, a3 ; RV64M-NEXT: lui a5, %hi(.LCPI3_1) ; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5) -; RV64M-NEXT: add a2, a2, a3 -; RV64M-NEXT: slli a3, a3, 2 +; RV64M-NEXT: slli a1, a1, 31 +; RV64M-NEXT: srai a1, a1, 31 ; RV64M-NEXT: add a2, a2, a3 ; RV64M-NEXT: mulh a3, a4, a5 ; RV64M-NEXT: srli a5, a3, 63 ; RV64M-NEXT: srai a3, a3, 1 ; RV64M-NEXT: add a3, a3, a5 ; RV64M-NEXT: slli a5, a3, 3 -; RV64M-NEXT: add a3, a4, a3 ; RV64M-NEXT: sub a3, a3, a5 +; RV64M-NEXT: add a3, a4, a3 ; RV64M-NEXT: addi a3, a3, -1 ; RV64M-NEXT: seqz a3, a3 ; RV64M-NEXT: lui a4, 699051 @@ -708,9 +708,9 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vmv.x.s a2, v8 ; RV32MV-NEXT: andi a2, a2, 1 ; RV32MV-NEXT: slli a2, a2, 1 +; RV32MV-NEXT: or a1, a1, a2 ; RV32MV-NEXT: slli a0, a0, 2 ; RV32MV-NEXT: or a0, a1, a0 -; RV32MV-NEXT: or a0, a0, a2 ; RV32MV-NEXT: sw a0, 8(s0) ; RV32MV-NEXT: csrr a0, vlenb ; RV32MV-NEXT: slli a0, a0, 1 @@ -734,22 +734,22 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: slli a1, a1, 29 ; RV64MV-NEXT: srai a1, a1, 31 ; RV64MV-NEXT: srli a4, a3, 2 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_0) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_0)(a5) ; RV64MV-NEXT: slli a2, a2, 62 ; RV64MV-NEXT: or a2, a2, a4 -; RV64MV-NEXT: lui a4, %hi(.LCPI3_0) -; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4) ; RV64MV-NEXT: srai a2, a2, 31 -; RV64MV-NEXT: slli a3, a3, 31 -; RV64MV-NEXT: srai a3, a3, 31 -; RV64MV-NEXT: mulh a4, a2, a4 +; RV64MV-NEXT: mulh a4, a2, a5 ; RV64MV-NEXT: srli a5, a4, 63 ; RV64MV-NEXT: srai a4, a4, 1 ; RV64MV-NEXT: add a4, a4, a5 +; RV64MV-NEXT: slli a5, a4, 3 +; RV64MV-NEXT: sub a4, a4, a5 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) +; RV64MV-NEXT: slli a3, a3, 31 +; RV64MV-NEXT: srai a3, a3, 31 ; RV64MV-NEXT: add a2, a2, a4 -; RV64MV-NEXT: slli a4, a4, 3 -; RV64MV-NEXT: sub a2, a2, a4 ; RV64MV-NEXT: mulh a4, a3, a5 ; RV64MV-NEXT: srli a5, a4, 63 ; RV64MV-NEXT: add a4, a4, a5 @@ -767,8 +767,8 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: srai a2, a2, 1 ; RV64MV-NEXT: add a2, a2, a3 ; RV64MV-NEXT: slli a3, a2, 2 +; RV64MV-NEXT: add a2, a3, a2 ; RV64MV-NEXT: add a1, a1, a2 -; RV64MV-NEXT: add a1, a1, a3 ; RV64MV-NEXT: vslide1down.vx v8, v8, a1 ; RV64MV-NEXT: vslidedown.vi v8, v8, 1 ; RV64MV-NEXT: li a1, -1 diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index 90443051d4b57..bdbe0d33e9b97 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -458,32 +458,32 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32IM-NEXT: add a6, a6, a7 ; RV32IM-NEXT: li a7, 95 ; RV32IM-NEXT: mul t0, a6, a7 -; RV32IM-NEXT: mulh t1, a1, a5 -; RV32IM-NEXT: add t1, t1, a1 +; RV32IM-NEXT: sub a2, a2, t0 +; RV32IM-NEXT: mulh t0, a1, a5 +; RV32IM-NEXT: add t0, t0, a1 +; RV32IM-NEXT: srli t1, t0, 31 +; RV32IM-NEXT: srai t0, t0, 6 +; RV32IM-NEXT: add t0, t0, t1 +; RV32IM-NEXT: mul t1, t0, a7 +; RV32IM-NEXT: sub a1, a1, t1 +; RV32IM-NEXT: mulh t1, a4, a5 +; RV32IM-NEXT: add t1, t1, a4 ; RV32IM-NEXT: srli t2, t1, 31 ; RV32IM-NEXT: srai t1, t1, 6 ; RV32IM-NEXT: add t1, t1, t2 ; RV32IM-NEXT: mul t2, t1, a7 -; RV32IM-NEXT: mulh t3, a4, a5 -; RV32IM-NEXT: add t3, t3, a4 -; RV32IM-NEXT: srli t4, t3, 31 -; RV32IM-NEXT: srai t3, t3, 6 -; RV32IM-NEXT: add t3, t3, t4 -; RV32IM-NEXT: mul t4, t3, a7 +; RV32IM-NEXT: sub a4, a4, t2 ; RV32IM-NEXT: mulh a5, a3, a5 ; RV32IM-NEXT: add a5, a5, a3 -; RV32IM-NEXT: srli t5, a5, 31 +; RV32IM-NEXT: srli t2, a5, 31 ; RV32IM-NEXT: srai a5, a5, 6 -; RV32IM-NEXT: add a5, a5, t5 +; RV32IM-NEXT: add a5, a5, t2 ; RV32IM-NEXT: mul a7, a5, a7 -; RV32IM-NEXT: add a3, a3, a5 ; RV32IM-NEXT: sub a3, a3, a7 -; RV32IM-NEXT: add a4, a4, t3 -; RV32IM-NEXT: sub a4, a4, t4 -; RV32IM-NEXT: add a1, a1, t1 -; RV32IM-NEXT: sub a1, a1, t2 +; RV32IM-NEXT: add a3, a3, a5 +; RV32IM-NEXT: add a4, a4, t1 +; RV32IM-NEXT: add a1, a1, t0 ; RV32IM-NEXT: add a2, a2, a6 -; RV32IM-NEXT: sub a2, a2, t0 ; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: sh a1, 4(a0) ; RV32IM-NEXT: sh a4, 2(a0) @@ -575,35 +575,35 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV64IM-NEXT: add a6, a6, a7 ; RV64IM-NEXT: li a7, 95 ; RV64IM-NEXT: mul t0, a6, a7 -; RV64IM-NEXT: mulh t1, a1, a3 -; RV64IM-NEXT: add t1, t1, a1 +; RV64IM-NEXT: subw a2, a2, t0 +; RV64IM-NEXT: mulh t0, a1, a3 +; RV64IM-NEXT: add t0, t0, a1 +; RV64IM-NEXT: srli t1, t0, 63 +; RV64IM-NEXT: srai t0, t0, 6 +; RV64IM-NEXT: add t0, t0, t1 +; RV64IM-NEXT: mul t1, t0, a7 +; RV64IM-NEXT: subw a1, a1, t1 +; RV64IM-NEXT: mulh t1, a5, a3 +; RV64IM-NEXT: add t1, t1, a5 ; RV64IM-NEXT: srli t2, t1, 63 ; RV64IM-NEXT: srai t1, t1, 6 ; RV64IM-NEXT: add t1, t1, t2 ; RV64IM-NEXT: mul t2, t1, a7 -; RV64IM-NEXT: mulh t3, a5, a3 -; RV64IM-NEXT: add t3, t3, a5 -; RV64IM-NEXT: srli t4, t3, 63 -; RV64IM-NEXT: srai t3, t3, 6 -; RV64IM-NEXT: add t3, t3, t4 -; RV64IM-NEXT: mul t4, t3, a7 +; RV64IM-NEXT: subw a5, a5, t2 ; RV64IM-NEXT: mulh a3, a4, a3 ; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: srli t5, a3, 63 +; RV64IM-NEXT: srli t2, a3, 63 ; RV64IM-NEXT: srai a3, a3, 6 -; RV64IM-NEXT: add a3, a3, t5 +; RV64IM-NEXT: add a3, a3, t2 ; RV64IM-NEXT: mul a7, a3, a7 +; RV64IM-NEXT: subw a4, a4, a7 ; RV64IM-NEXT: add a3, a4, a3 -; RV64IM-NEXT: subw a3, a3, a7 -; RV64IM-NEXT: add a5, a5, t3 -; RV64IM-NEXT: subw a4, a5, t4 -; RV64IM-NEXT: add a1, a1, t1 -; RV64IM-NEXT: subw a1, a1, t2 +; RV64IM-NEXT: add a5, a5, t1 +; RV64IM-NEXT: add a1, a1, t0 ; RV64IM-NEXT: add a2, a2, a6 -; RV64IM-NEXT: subw a2, a2, t0 ; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a5, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll index f83a933c0b5c8..92aa7a5c58db5 100644 --- a/llvm/test/CodeGen/RISCV/urem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -140,8 +140,8 @@ define i32 @combine_urem_udiv(i32 %x) nounwind { ; RV32IM-NEXT: srli a1, a1, 6 ; RV32IM-NEXT: li a2, 95 ; RV32IM-NEXT: mul a2, a1, a2 -; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: sub a0, a0, a2 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_urem_udiv: @@ -180,8 +180,8 @@ define i32 @combine_urem_udiv(i32 %x) nounwind { ; RV64IM-NEXT: srli a1, a1, 6 ; RV64IM-NEXT: li a2, 95 ; RV64IM-NEXT: mul a2, a1, a2 -; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: subw a0, a0, a2 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = urem i32 %x, 95 %2 = udiv i32 %x, 95 diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index 5a5ae66b5fa76..62a82fdd54f69 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -192,8 +192,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV32-LABEL: test_urem_odd_setne: ; RV32: # %bb.0: ; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: neg a0, a0 -; RV32-NEXT: sub a0, a0, a1 ; RV32-NEXT: andi a0, a0, 15 ; RV32-NEXT: sltiu a0, a0, 4 ; RV32-NEXT: xori a0, a0, 1 @@ -202,8 +202,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV64-LABEL: test_urem_odd_setne: ; RV64: # %bb.0: ; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: negw a0, a0 -; RV64-NEXT: subw a0, a0, a1 ; RV64-NEXT: andi a0, a0, 15 ; RV64-NEXT: sltiu a0, a0, 4 ; RV64-NEXT: xori a0, a0, 1 @@ -212,8 +212,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV32M-LABEL: test_urem_odd_setne: ; RV32M: # %bb.0: ; RV32M-NEXT: slli a1, a0, 1 +; RV32M-NEXT: add a0, a1, a0 ; RV32M-NEXT: neg a0, a0 -; RV32M-NEXT: sub a0, a0, a1 ; RV32M-NEXT: andi a0, a0, 15 ; RV32M-NEXT: sltiu a0, a0, 4 ; RV32M-NEXT: xori a0, a0, 1 @@ -222,8 +222,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV64M-LABEL: test_urem_odd_setne: ; RV64M: # %bb.0: ; RV64M-NEXT: slli a1, a0, 1 +; RV64M-NEXT: add a0, a1, a0 ; RV64M-NEXT: negw a0, a0 -; RV64M-NEXT: subw a0, a0, a1 ; RV64M-NEXT: andi a0, a0, 15 ; RV64M-NEXT: sltiu a0, a0, 4 ; RV64M-NEXT: xori a0, a0, 1 @@ -232,8 +232,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV32MV-LABEL: test_urem_odd_setne: ; RV32MV: # %bb.0: ; RV32MV-NEXT: slli a1, a0, 1 +; RV32MV-NEXT: add a0, a1, a0 ; RV32MV-NEXT: neg a0, a0 -; RV32MV-NEXT: sub a0, a0, a1 ; RV32MV-NEXT: andi a0, a0, 15 ; RV32MV-NEXT: sltiu a0, a0, 4 ; RV32MV-NEXT: xori a0, a0, 1 @@ -242,8 +242,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; RV64MV-LABEL: test_urem_odd_setne: ; RV64MV: # %bb.0: ; RV64MV-NEXT: slli a1, a0, 1 +; RV64MV-NEXT: add a0, a1, a0 ; RV64MV-NEXT: negw a0, a0 -; RV64MV-NEXT: subw a0, a0, a1 ; RV64MV-NEXT: andi a0, a0, 15 ; RV64MV-NEXT: sltiu a0, a0, 4 ; RV64MV-NEXT: xori a0, a0, 1 @@ -366,9 +366,9 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32-NEXT: andi a1, s3, 2047 ; RV32-NEXT: andi a0, a0, 2047 ; RV32-NEXT: slli a0, a0, 11 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: slli s1, s1, 22 ; RV32-NEXT: or a0, a0, s1 -; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: sw a0, 0(s0) ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -420,8 +420,8 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64-NEXT: andi a1, s3, 2047 ; RV64-NEXT: andi a2, s2, 2047 ; RV64-NEXT: slli a2, a2, 11 +; RV64-NEXT: or a1, a1, a2 ; RV64-NEXT: slli a0, a0, 22 -; RV64-NEXT: or a0, a2, a0 ; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: sw a0, 0(s0) ; RV64-NEXT: slli a0, a0, 31 @@ -471,8 +471,8 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32M-NEXT: andi a2, a2, 2047 ; RV32M-NEXT: andi a3, a3, 2047 ; RV32M-NEXT: slli a3, a3, 11 +; RV32M-NEXT: or a2, a2, a3 ; RV32M-NEXT: slli a1, a1, 22 -; RV32M-NEXT: or a1, a3, a1 ; RV32M-NEXT: or a1, a2, a1 ; RV32M-NEXT: sw a1, 0(a0) ; RV32M-NEXT: ret @@ -510,8 +510,8 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64M-NEXT: andi a1, a1, 2047 ; RV64M-NEXT: andi a3, a3, 2047 ; RV64M-NEXT: slli a3, a3, 11 +; RV64M-NEXT: or a1, a1, a3 ; RV64M-NEXT: slli a2, a2, 22 -; RV64M-NEXT: or a2, a3, a2 ; RV64M-NEXT: or a1, a1, a2 ; RV64M-NEXT: sw a1, 0(a0) ; RV64M-NEXT: slli a1, a1, 31 @@ -575,9 +575,9 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vmv.x.s a3, v8 ; RV32MV-NEXT: andi a3, a3, 2047 ; RV32MV-NEXT: slli a3, a3, 11 +; RV32MV-NEXT: or a2, a2, a3 ; RV32MV-NEXT: slli a1, a1, 22 ; RV32MV-NEXT: or a1, a2, a1 -; RV32MV-NEXT: or a1, a1, a3 ; RV32MV-NEXT: sw a1, 0(a0) ; RV32MV-NEXT: ret ; @@ -631,10 +631,10 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vmv.x.s a2, v9 ; RV64MV-NEXT: andi a2, a2, 2047 ; RV64MV-NEXT: slli a2, a2, 11 +; RV64MV-NEXT: or a1, a1, a2 ; RV64MV-NEXT: vslidedown.vi v8, v8, 2 -; RV64MV-NEXT: vmv.x.s a3, v8 -; RV64MV-NEXT: slli a3, a3, 22 -; RV64MV-NEXT: or a1, a1, a3 +; RV64MV-NEXT: vmv.x.s a2, v8 +; RV64MV-NEXT: slli a2, a2, 22 ; RV64MV-NEXT: or a1, a1, a2 ; RV64MV-NEXT: sw a1, 0(a0) ; RV64MV-NEXT: slli a1, a1, 31 diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index b0e790ed60635..2a1b0c5bb794d 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -397,20 +397,20 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV32IM-NEXT: mulhu a6, a2, a5 ; RV32IM-NEXT: li a7, 95 ; RV32IM-NEXT: mul t0, a6, a7 -; RV32IM-NEXT: mulhu t1, a1, a5 +; RV32IM-NEXT: sub a2, a2, t0 +; RV32IM-NEXT: mulhu t0, a1, a5 +; RV32IM-NEXT: mul t1, t0, a7 +; RV32IM-NEXT: sub a1, a1, t1 +; RV32IM-NEXT: mulhu t1, a4, a5 ; RV32IM-NEXT: mul t2, t1, a7 -; RV32IM-NEXT: mulhu t3, a4, a5 -; RV32IM-NEXT: mul t4, t3, a7 +; RV32IM-NEXT: sub a4, a4, t2 ; RV32IM-NEXT: mulhu a5, a3, a5 ; RV32IM-NEXT: mul a7, a5, a7 -; RV32IM-NEXT: add a3, a3, a5 ; RV32IM-NEXT: sub a3, a3, a7 -; RV32IM-NEXT: add a4, a4, t3 -; RV32IM-NEXT: sub a4, a4, t4 -; RV32IM-NEXT: add a1, a1, t1 -; RV32IM-NEXT: sub a1, a1, t2 +; RV32IM-NEXT: add a3, a3, a5 +; RV32IM-NEXT: add a4, a4, t1 +; RV32IM-NEXT: add a1, a1, t0 ; RV32IM-NEXT: add a2, a2, a6 -; RV32IM-NEXT: sub a2, a2, t0 ; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: sh a1, 4(a0) ; RV32IM-NEXT: sh a4, 2(a0) @@ -498,23 +498,23 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV64IM-NEXT: mulhu a6, a2, a3 ; RV64IM-NEXT: li a7, 95 ; RV64IM-NEXT: mul t0, a6, a7 -; RV64IM-NEXT: mulhu t1, a1, a3 +; RV64IM-NEXT: subw a2, a2, t0 +; RV64IM-NEXT: mulhu t0, a1, a3 +; RV64IM-NEXT: mul t1, t0, a7 +; RV64IM-NEXT: subw a1, a1, t1 +; RV64IM-NEXT: mulhu t1, a5, a3 ; RV64IM-NEXT: mul t2, t1, a7 -; RV64IM-NEXT: mulhu t3, a5, a3 -; RV64IM-NEXT: mul t4, t3, a7 +; RV64IM-NEXT: subw a5, a5, t2 ; RV64IM-NEXT: mulhu a3, a4, a3 ; RV64IM-NEXT: mul a7, a3, a7 +; RV64IM-NEXT: subw a4, a4, a7 ; RV64IM-NEXT: add a3, a4, a3 -; RV64IM-NEXT: subw a3, a3, a7 -; RV64IM-NEXT: add a5, a5, t3 -; RV64IM-NEXT: subw a4, a5, t4 -; RV64IM-NEXT: add a1, a1, t1 -; RV64IM-NEXT: subw a1, a1, t2 +; RV64IM-NEXT: add a5, a5, t1 +; RV64IM-NEXT: add a1, a1, t0 ; RV64IM-NEXT: add a2, a2, a6 -; RV64IM-NEXT: subw a2, a2, t0 ; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a4, 2(a0) +; RV64IM-NEXT: sh a5, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index b1efe53290e8e..de046ffb1ce09 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -5155,8 +5155,8 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV32-NEXT: add a4, a4, t1 ; RV32-NEXT: sltu a5, t3, a5 ; RV32-NEXT: mulh a2, t2, a2 -; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: sub a0, t0, a0 +; RV32-NEXT: sub a0, a0, a1 ; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: add a0, a0, a5 ; RV32-NEXT: add a0, a4, a0 @@ -5217,8 +5217,8 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV32ZBA-NEXT: add a4, a4, t1 ; RV32ZBA-NEXT: sltu a5, t3, a5 ; RV32ZBA-NEXT: mulh a2, t2, a2 -; RV32ZBA-NEXT: add a0, a0, a1 ; RV32ZBA-NEXT: sub a0, t0, a0 +; RV32ZBA-NEXT: sub a0, a0, a1 ; RV32ZBA-NEXT: add a0, a0, a2 ; RV32ZBA-NEXT: add a0, a0, a5 ; RV32ZBA-NEXT: add a0, a4, a0 @@ -5279,8 +5279,8 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV32ZICOND-NEXT: add a4, a4, t1 ; RV32ZICOND-NEXT: sltu a5, t3, a5 ; RV32ZICOND-NEXT: mulh a2, t2, a2 -; RV32ZICOND-NEXT: add a0, a0, a1 ; RV32ZICOND-NEXT: sub a0, t0, a0 +; RV32ZICOND-NEXT: sub a0, a0, a1 ; RV32ZICOND-NEXT: add a0, a0, a2 ; RV32ZICOND-NEXT: add a0, a0, a5 ; RV32ZICOND-NEXT: add a0, a4, a0 diff --git a/llvm/test/CodeGen/RISCV/xtheadmac.ll b/llvm/test/CodeGen/RISCV/xtheadmac.ll index 992c88e3e6268..3d48e7675be70 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmac.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmac.ll @@ -43,8 +43,8 @@ define i64 @mula_i64(i64 %a, i64 %b, i64 %c) { ; RV32XTHEADMAC-NEXT: mv a3, a0 ; RV32XTHEADMAC-NEXT: th.mula a3, a2, a4 ; RV32XTHEADMAC-NEXT: sltu a0, a3, a0 -; RV32XTHEADMAC-NEXT: add a0, a1, a0 -; RV32XTHEADMAC-NEXT: add a1, a0, a6 +; RV32XTHEADMAC-NEXT: add a1, a1, a6 +; RV32XTHEADMAC-NEXT: add a1, a1, a0 ; RV32XTHEADMAC-NEXT: mv a0, a3 ; RV32XTHEADMAC-NEXT: ret ; @@ -102,8 +102,8 @@ define i64 @muls_i64(i64 %a, i64 %b, i64 %c) { ; RV32XTHEADMAC-NEXT: mul a3, a2, a4 ; RV32XTHEADMAC-NEXT: sltu a3, a0, a3 ; RV32XTHEADMAC-NEXT: th.muls a0, a2, a4 -; RV32XTHEADMAC-NEXT: sub a1, a1, a3 ; RV32XTHEADMAC-NEXT: sub a1, a1, a6 +; RV32XTHEADMAC-NEXT: sub a1, a1, a3 ; RV32XTHEADMAC-NEXT: ret ; ; RV64XTHEADMAC-LABEL: muls_i64: diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll index 46aa383866e93..09b8d7fce5855 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll @@ -324,8 +324,8 @@ define ptr @ldib(ptr %base, i64 %a) { ; RV32XTHEADMEMIDX-NEXT: lw a4, 4(a0) ; RV32XTHEADMEMIDX-NEXT: add a1, a3, a1 ; RV32XTHEADMEMIDX-NEXT: sltu a3, a1, a3 -; RV32XTHEADMEMIDX-NEXT: add a2, a2, a3 ; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2 +; RV32XTHEADMEMIDX-NEXT: add a2, a2, a3 ; RV32XTHEADMEMIDX-NEXT: sw a1, 8(a0) ; RV32XTHEADMEMIDX-NEXT: sw a2, 12(a0) ; RV32XTHEADMEMIDX-NEXT: ret diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 51ffeca52a665..52fc3bc47b613 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -2004,8 +2004,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwin ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] -; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] +; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) @@ -2179,8 +2179,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] -; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] +; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) @@ -2287,8 +2287,8 @@ define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounw ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] +; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] ; X86-NEXT: popl %esi # encoding: [0x5e] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] @@ -2312,8 +2312,8 @@ define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounw ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] -; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] +; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) @@ -2420,8 +2420,8 @@ define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) noun ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] +; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] ; X86-NEXT: popl %esi # encoding: [0x5e] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] @@ -2445,8 +2445,8 @@ define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) noun ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] -; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] +; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll index 234c7a0a500d3..34ef23db34575 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -553,8 +553,8 @@ define i8 @v8i32_or_select(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0 -; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm3, %ymm1 +; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -571,8 +571,8 @@ define i8 @v8i32_or_select(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32 ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: .LBB7_3: ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm1 +; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll index f66d81c781fe0..f98d9c63d823a 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll @@ -124,10 +124,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: pushl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: calll __divdi3 @@ -136,11 +136,11 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ecx, 4(%edx) ; X86-NEXT: movl %eax, (%edx) -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx -; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: imull %ebp, %ecx ; X86-NEXT: addl %edx, %ecx +; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: subl %eax, %esi ; X86-NEXT: sbbl %ecx, %edi ; X86-NEXT: movl %esi, %eax diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index 6fdde0b14a984..4caea12928ef9 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -124,10 +124,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: pushl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: calll __udivdi3 @@ -136,11 +136,11 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ecx, 4(%edx) ; X86-NEXT: movl %eax, (%edx) -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx -; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: imull %ebp, %ecx ; X86-NEXT: addl %edx, %ecx +; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: subl %eax, %esi ; X86-NEXT: sbbl %ecx, %edi ; X86-NEXT: movl %esi, %eax @@ -180,114 +180,115 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $160, %esp -; X86-NEXT: movl 28(%ebp), %ebx -; X86-NEXT: movl 40(%ebp), %esi +; X86-NEXT: movl 28(%ebp), %ecx ; X86-NEXT: movl 32(%ebp), %edi ; X86-NEXT: movl %edi, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: orl 40(%ebp), %eax ; X86-NEXT: orl 36(%ebp), %ecx ; X86-NEXT: orl %eax, %ecx -; X86-NEXT: sete %cl +; X86-NEXT: sete %bl ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: orl 24(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %esi ; X86-NEXT: movl 12(%ebp), %edx -; X86-NEXT: orl 20(%ebp), %edx +; X86-NEXT: orl %esi, %edx ; X86-NEXT: orl %eax, %edx ; X86-NEXT: sete %al -; X86-NEXT: orb %cl, %al +; X86-NEXT: orb %bl, %al ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: bsrl %esi, %edx +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: bsrl %eax, %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: bsrl 36(%ebp), %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: orl $32, %ecx -; X86-NEXT: testl %esi, %esi +; X86-NEXT: testl %eax, %eax ; X86-NEXT: cmovnel %edx, %ecx ; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl %ebx, %eax -; X86-NEXT: xorl $31, %eax -; X86-NEXT: orl $32, %eax +; X86-NEXT: bsrl 28(%ebp), %ebx +; X86-NEXT: xorl $31, %ebx +; X86-NEXT: orl $32, %ebx ; X86-NEXT: testl %edi, %edi -; X86-NEXT: cmovnel %edx, %eax -; X86-NEXT: orl $64, %eax -; X86-NEXT: movl 36(%ebp), %edx -; X86-NEXT: orl %esi, %edx -; X86-NEXT: cmovnel %ecx, %eax -; X86-NEXT: movl 24(%ebp), %ebx -; X86-NEXT: bsrl %ebx, %edx +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: cmovnel %edx, %ebx +; X86-NEXT: orl $64, %ebx +; X86-NEXT: movl %edi, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: cmovnel %ecx, %ebx +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: bsrl %eax, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: bsrl %ecx, %ecx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: bsrl %esi, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: orl $32, %ecx -; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: testl %eax, %eax ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: movl 16(%ebp), %edi -; X86-NEXT: bsrl %edi, %esi +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: bsrl %eax, %esi ; X86-NEXT: xorl $31, %esi ; X86-NEXT: bsrl 12(%ebp), %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: orl $32, %edx -; X86-NEXT: testl %edi, %edi +; X86-NEXT: testl %eax, %eax ; X86-NEXT: cmovnel %esi, %edx ; X86-NEXT: orl $64, %edx -; X86-NEXT: movl 20(%ebp), %edi ; X86-NEXT: movl %edi, %esi -; X86-NEXT: orl %ebx, %esi +; X86-NEXT: orl 24(%ebp), %esi ; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: subl %edx, %eax +; X86-NEXT: subl %edx, %ebx +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %edi, %edi ; X86-NEXT: movl $0, %edx ; X86-NEXT: sbbl %edx, %edx -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %ebx, %ebx ; X86-NEXT: movl $0, %esi ; X86-NEXT: sbbl %esi, %esi ; X86-NEXT: movl $127, %ecx -; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: cmpl %ebx, %ecx ; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: sbbl %edi, %ecx ; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl $0, %ecx ; X86-NEXT: sbbl %esi, %ecx ; X86-NEXT: setb %cl ; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: xorl $127, %eax ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %eax +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: xorl $127, %eax ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: sete %al +; X86-NEXT: orl %esi, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: sete %ch ; X86-NEXT: testb %cl, %cl -; X86-NEXT: movb %cl, %ah -; X86-NEXT: movl 24(%ebp), %ebx -; X86-NEXT: movl $0, %esi -; X86-NEXT: cmovnel %esi, %ebx -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: cmovnel %esi, %ecx -; X86-NEXT: movl $0, %edx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 24(%ebp), %edx +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovnel %edi, %edx +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: cmovnel %edi, %eax ; X86-NEXT: movl 16(%ebp), %esi -; X86-NEXT: cmovnel %edx, %esi +; X86-NEXT: cmovnel %edi, %esi ; X86-NEXT: movl 12(%ebp), %edi +; X86-NEXT: movl $0, %ebx +; X86-NEXT: cmovnel %ebx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: orb %cl, %ch ; X86-NEXT: movl %edi, %ecx -; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: orb %ah, %al ; X86-NEXT: movl 44(%ebp), %eax ; X86-NEXT: jne .LBB4_7 ; X86-NEXT: # %bb.1: # %udiv-bb1 -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: xorps %xmm0, %xmm0 ; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 20(%ebp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload @@ -298,100 +299,101 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: andb $12, %al ; X86-NEXT: negb %al ; X86-NEXT: movsbl %al, %eax -; X86-NEXT: movl 136(%esp,%eax), %edi +; X86-NEXT: movl 136(%esp,%eax), %ebx ; X86-NEXT: movl 140(%esp,%eax), %esi -; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: shldl %cl, %ebx, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 128(%esp,%eax), %ebx -; X86-NEXT: movl 132(%esp,%eax), %eax -; X86-NEXT: shldl %cl, %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, %edi -; X86-NEXT: shldl %cl, %ebx, %edi -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: movl 128(%esp,%eax), %edi +; X86-NEXT: movl 132(%esp,%eax), %esi +; X86-NEXT: shldl %cl, %esi, %ebx +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %edi, %ecx ; X86-NEXT: addl $1, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $0, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: adcl $0, %esi -; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl 20(%ebp), %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl $0, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: adcl $0, %edi ; X86-NEXT: jae .LBB4_2 ; X86-NEXT: # %bb.5: -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl %edi, %esi ; X86-NEXT: jmp .LBB4_6 ; X86-NEXT: .LBB4_2: # %udiv-preheader ; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 12(%ebp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%ebp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $12, %al ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl 92(%esp,%eax), %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 88(%esp,%eax), %edx -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl 92(%esp,%eax), %edi +; X86-NEXT: movl 88(%esp,%eax), %esi +; X86-NEXT: movl %esi, %edx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrdl %cl, %esi, %ebx +; X86-NEXT: shrdl %cl, %edi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 80(%esp,%eax), %edx +; X86-NEXT: movl 84(%esp,%eax), %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 80(%esp,%eax), %edi -; X86-NEXT: movl 84(%esp,%eax), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shrdl %cl, %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shrl %cl, %edx ; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrdl %cl, %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrdl %cl, %esi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shrl %cl, %edi +; X86-NEXT: movl %edi, %esi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 28(%ebp), %eax ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 32(%ebp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%ebp), %esi -; X86-NEXT: adcl $-1, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: adcl $-1, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 40(%ebp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB4_3: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: shldl $1, %esi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: shldl $1, %edi, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, %esi ; X86-NEXT: shldl $1, %ebx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl $1, %ebx, %esi +; X86-NEXT: shldl $1, %ecx, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, %ebx +; X86-NEXT: shldl $1, %edx, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: shldl $1, %ecx, %edx ; X86-NEXT: orl %eax, %edx @@ -400,19 +402,15 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: shldl $1, %edx, %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %edx -; X86-NEXT: orl %eax, %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %edi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl %esi, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: sbbl %edi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: movl %ecx, %eax @@ -421,112 +419,111 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl 40(%ebp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl 36(%ebp), %eax ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl 32(%ebp), %edx +; X86-NEXT: andl 36(%ebp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl 32(%ebp), %eax ; X86-NEXT: andl 28(%ebp), %ecx ; X86-NEXT: subl %ecx, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: movl %esi, %ebx ; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax -; X86-NEXT: adcl $-1, %ebx -; X86-NEXT: adcl $-1, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: adcl $-1, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edi, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: jne .LBB4_3 ; X86-NEXT: # %bb.4: +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: .LBB4_6: # %udiv-loop-exit -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: shldl $1, %edi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl $1, %ebx, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: shldl $1, %esi, %ebx ; X86-NEXT: orl %eax, %ebx -; X86-NEXT: shldl $1, %esi, %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shldl $1, %ecx, %esi ; X86-NEXT: orl %eax, %esi ; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: orl %edx, %ecx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl 44(%ebp), %eax ; X86-NEXT: .LBB4_7: # %udiv-end +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: movl %esi, %edx -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%ebp), %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: imull %edx, %esi +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %edx, %esi ; X86-NEXT: mull %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %esi, %edx -; X86-NEXT: movl 40(%ebp), %edi -; X86-NEXT: imull %ecx, %edi -; X86-NEXT: addl %edx, %edi -; X86-NEXT: movl 28(%ebp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: mull %esi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: imull 28(%ebp), %ebx -; X86-NEXT: addl %edx, %ebx -; X86-NEXT: movl 32(%ebp), %edx -; X86-NEXT: imull %edx, %esi -; X86-NEXT: addl %ebx, %esi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %edi, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: imull %eax, %ecx +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: addl %edi, %ecx +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: mull %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: imull 28(%ebp), %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: addl %esi, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ecx, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl 28(%ebp), %ecx ; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %ecx ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %esi, %ecx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull 32(%ebp) -; X86-NEXT: movl 16(%ebp), %esi ; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: setb %cl -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull 32(%ebp) ; X86-NEXT: addl %edi, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: adcl %ebx, %edx ; X86-NEXT: movl 12(%ebp), %ebx ; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl 16(%ebp), %esi ; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl 20(%ebp), %edi ; X86-NEXT: sbbl %eax, %edi diff --git a/llvm/test/CodeGen/X86/early-ifcvt-remarks.ll b/llvm/test/CodeGen/X86/early-ifcvt-remarks.ll index 054485a358066..0dc436c8f0834 100644 --- a/llvm/test/CodeGen/X86/early-ifcvt-remarks.ll +++ b/llvm/test/CodeGen/X86/early-ifcvt-remarks.ll @@ -8,10 +8,11 @@ target triple = "x86_64-none-none" ; CHECK-SAME: and the long leg adds another {{[0-9]+}} cycles{{s?}}, ; CHECK-SAME: each staying under the threshold of {{[0-9]+}} cycles{{s?}}. -; CHECK: remark: :0:0: did not if-convert branch: -; CHECK-SAME: the condition would add {{[0-9]+}} cycles{{s?}} to the critical path, -; CHECK-SAME: and the short leg would add another {{[0-9]+}} cycles{{s?}}, -; CHECK-SAME: and the long leg would add another {{[0-9]+}} cycles{{s?}} exceeding the limit of {{[0-9]+}} cycles{{s?}}. +; CHECK: remark: :0:0: performing if-conversion on branch: +; CHECK-SAME: the condition adds {{[0-9]+}} cycle{{s?}} to the critical path, +; CHECK-SAME: and the short leg adds another {{[0-9]+}} cycles{{s?}}, +; CHECK-SAME: and the long leg adds another {{[0-9]+}} cycles{{s?}}, +; CHECK-SAME: each staying under the threshold of {{[0-9]+}} cycles{{s?}}. ; CHECK: remark: :0:0: did not if-convert branch: ; CHECK-SAME: the resulting critical path ({{[0-9]+}} cycles{{s?}}) @@ -42,16 +43,16 @@ if.else: define i32 @mm2(i1 %pred, i32 %val, i32 %e1, i32 %e2, i32 %e3, i32 %e4, i32 %e5) { ; CHECK-LABEL: mm2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $r8d killed $r8d def $r8 +; CHECK-NEXT: # kill: def $ecx killed $ecx def $rcx +; CHECK-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: leal (%rdx,%rsi), %eax +; CHECK-NEXT: addl %r8d, %ecx +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %r9d, %eax ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je .LBB1_2 -; CHECK-NEXT: # %bb.1: # %if.true -; CHECK-NEXT: addl %eax, %edx -; CHECK-NEXT: addl %ecx, %r8d -; CHECK-NEXT: addl %edx, %r8d -; CHECK-NEXT: addl %r8d, %r9d -; CHECK-NEXT: movl %r9d, %eax -; CHECK-NEXT: .LBB1_2: # %if.else +; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq entry: br i1 %pred, label %if.true, label %if.else @@ -71,9 +72,8 @@ if.else: define i64 @mm3(i1 %pred, i64 %val, i64 %e1, i128 %e2, i128 %e3, i128 %e4, i128 %e5) { ; CHECK-LABEL: mm3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: movq %rsi, %r10 +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: jne .LBB2_2 ; CHECK-NEXT: # %bb.1: # %if.false ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi @@ -90,25 +90,25 @@ define i64 @mm3(i1 %pred, i64 %val, i64 %e1, i128 %e2, i128 %e3, i128 %e4, i128 ; CHECK-NEXT: addq %r10, %r8 ; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8 +; CHECK-NEXT: xorq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: xorq {{[0-9]+}}(%rsp), %rdi ; CHECK-NEXT: xorq %r8, %rdi -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; CHECK-NEXT: xorq %rsi, %r10 -; CHECK-NEXT: xorq %rax, %r10 -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rdx ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: sarq %cl, %rax +; CHECK-NEXT: sarq %cl, %rdx ; CHECK-NEXT: addq %rdi, %rdi ; CHECK-NEXT: notb %cl ; CHECK-NEXT: shlq %cl, %rdi ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shrq %cl, %r10 -; CHECK-NEXT: orq %rdi, %r10 +; CHECK-NEXT: shrq %cl, %rax +; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: testb $64, %sil -; CHECK-NEXT: cmovneq %rax, %r10 -; CHECK-NEXT: movq %r9, %rax +; CHECK-NEXT: cmovneq %rdx, %rax +; CHECK-NEXT: movq %r9, %rsi ; CHECK-NEXT: .LBB2_2: # %if.endif -; CHECK-NEXT: addq %r10, %rax +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: retq entry: br i1 %pred, label %if.true, label %if.false diff --git a/llvm/test/CodeGen/X86/fold-tied-op.ll b/llvm/test/CodeGen/X86/fold-tied-op.ll index 5ea2964057588..672e9ba8fc779 100644 --- a/llvm/test/CodeGen/X86/fold-tied-op.ll +++ b/llvm/test/CodeGen/X86/fold-tied-op.ll @@ -20,60 +20,60 @@ define i64 @fn1() #0 { ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_offset %esi, -20 ; CHECK-NEXT: .cfi_offset %edi, -16 ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: movl $-1028477379, %ecx # imm = 0xC2B2AE3D ; CHECK-NEXT: movl $668265295, %esi # imm = 0x27D4EB4F -; CHECK-NEXT: movl a, %edi -; CHECK-NEXT: cmpl $0, (%edi) +; CHECK-NEXT: movl a, %eax +; CHECK-NEXT: cmpl $0, (%eax) ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then -; CHECK-NEXT: movl 8(%edi), %ecx -; CHECK-NEXT: movl 12(%edi), %edx -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: shldl $1, %ecx, %eax -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: leal (%ecx,%ecx), %edx +; CHECK-NEXT: movl 8(%eax), %edi +; CHECK-NEXT: movl 12(%eax), %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: shldl $1, %edi, %edx ; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl 16(%edi), %ebx -; CHECK-NEXT: movl 20(%edi), %edx -; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: shldl $2, %ebx, %edx +; CHECK-NEXT: leal (%edi,%edi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl 16(%eax), %ebx +; CHECK-NEXT: movl 20(%eax), %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: shldl $2, %ebx, %edi ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: shldl $31, %ebx, %ecx ; CHECK-NEXT: shll $2, %ebx ; CHECK-NEXT: orl %ecx, %ebx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: shrl %ecx -; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: orl %edi, %ecx ; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: adcl %eax, %ecx +; CHECK-NEXT: adcl %edx, %ecx ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl 24(%edi), %eax -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl $-1028477379, %ebx # imm = 0xC2B2AE3D -; CHECK-NEXT: imull %eax, %ebx +; CHECK-NEXT: movl 28(%eax), %ebx +; CHECK-NEXT: movl 24(%eax), %edi +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: mull %esi ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: addl %ebx, %edx -; CHECK-NEXT: movl 28(%edi), %edi -; CHECK-NEXT: imull %edi, %esi +; CHECK-NEXT: imull %ebx, %esi ; CHECK-NEXT: addl %edx, %esi -; CHECK-NEXT: movl $1336530590, %edx # imm = 0x4FA9D69E -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: mull %edx -; CHECK-NEXT: imull $-2056954758, %ebx, %ebx # imm = 0x85655C7A -; CHECK-NEXT: addl %edx, %ebx -; CHECK-NEXT: imull $1336530590, %edi, %edx # imm = 0x4FA9D69E -; CHECK-NEXT: addl %ebx, %edx +; CHECK-NEXT: movl $-1028477379, %eax # imm = 0xC2B2AE3D +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: addl %eax, %esi +; CHECK-NEXT: imull $1336530590, %ebx, %edi # imm = 0x4FA9D69E +; CHECK-NEXT: imull $-2056954758, %edx, %eax # imm = 0x85655C7A +; CHECK-NEXT: addl %eax, %edi +; CHECK-NEXT: movl $1336530590, %ebx # imm = 0x4FA9D69E +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: mull %ebx +; CHECK-NEXT: addl %edx, %edi ; CHECK-NEXT: shrdl $3, %esi, %ecx ; CHECK-NEXT: sarl $3, %esi -; CHECK-NEXT: orl %edx, %esi +; CHECK-NEXT: orl %edi, %esi ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: movl $-66860409, %ebx # imm = 0xFC03CA87 ; CHECK-NEXT: movl %ecx, %eax @@ -108,7 +108,7 @@ define i64 @fn1() #0 { ; CHECK-NEXT: adcl $-2048144777, %ecx # imm = 0x85EBCA77 ; CHECK-NEXT: movl %eax, b ; CHECK-NEXT: movl %ecx, b+4 -; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/horizontal-sum.ll b/llvm/test/CodeGen/X86/horizontal-sum.ll index 5fe1e2996ee9b..1dc7af3b77987 100644 --- a/llvm/test/CodeGen/X86/horizontal-sum.ll +++ b/llvm/test/CodeGen/X86/horizontal-sum.ll @@ -687,7 +687,7 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3 ; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[3,3,3,3] ; AVX1-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; AVX1-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0,1,2,3],xmm2[4,5,6,7] -; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm2, %xmm1 +; AVX1-SLOW-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] ; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[0,0,0,0] @@ -733,7 +733,7 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[3,3,3,3] ; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3] -; AVX2-SLOW-NEXT: vpaddd %xmm1, %xmm2, %xmm1 +; AVX2-SLOW-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm3, %xmm1 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm2 diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index 97136dafa6c2c..3dfec807ee4a2 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -1910,10 +1910,10 @@ define i1 @not_iszero_or_qnan_f(float %x) { ; X86-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF ; X86-NEXT: setb %ch ; X86-NEXT: orb %cl, %ch +; X86-NEXT: orb %dl, %ch ; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 ; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 ; X86-NEXT: setb %al -; X86-NEXT: orb %dl, %al ; X86-NEXT: orb %ch, %al ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 @@ -1934,10 +1934,10 @@ define i1 @not_iszero_or_qnan_f(float %x) { ; X64-GENERIC-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF ; X64-GENERIC-NEXT: setb %sil ; X64-GENERIC-NEXT: orb %cl, %sil +; X64-GENERIC-NEXT: orb %dl, %sil ; X64-GENERIC-NEXT: addl $-8388608, %eax # imm = 0xFF800000 ; X64-GENERIC-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 ; X64-GENERIC-NEXT: setb %al -; X64-GENERIC-NEXT: orb %dl, %al ; X64-GENERIC-NEXT: orb %sil, %al ; X64-GENERIC-NEXT: retq ; @@ -1956,11 +1956,11 @@ define i1 @not_iszero_or_qnan_f(float %x) { ; X64-NDD-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF ; X64-NDD-NEXT: setb %sil ; X64-NDD-NEXT: orb %sil, %dl +; X64-NDD-NEXT: orb %dl, %cl ; X64-NDD-NEXT: addl $-8388608, %eax # imm = 0xFF800000 ; X64-NDD-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 ; X64-NDD-NEXT: setb %al ; X64-NDD-NEXT: orb %cl, %al -; X64-NDD-NEXT: orb %dl, %al ; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 925) ; ~(0x60|0x2) = "~(zero|qnan)" @@ -2280,39 +2280,25 @@ define i1 @not_is_plus_inf_or_snan_f(float %x) { ; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF ; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 ; X86-NEXT: setl %dl -; X86-NEXT: orb %cl, %dl ; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 ; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al ; X86-NEXT: orb %dl, %al ; X86-NEXT: retl ; -; X64-GENERIC-LABEL: not_is_plus_inf_or_snan_f: -; X64-GENERIC: # %bb.0: -; X64-GENERIC-NEXT: movd %xmm0, %eax -; X64-GENERIC-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; X64-GENERIC-NEXT: sete %cl -; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X64-GENERIC-NEXT: setl %dl -; X64-GENERIC-NEXT: orb %cl, %dl -; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X64-GENERIC-NEXT: setge %al -; X64-GENERIC-NEXT: orb %dl, %al -; X64-GENERIC-NEXT: retq -; -; X64-NDD-LABEL: not_is_plus_inf_or_snan_f: -; X64-NDD: # %bb.0: -; X64-NDD-NEXT: movd %xmm0, %eax -; X64-NDD-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; X64-NDD-NEXT: sete %cl -; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X64-NDD-NEXT: setl %dl -; X64-NDD-NEXT: orb %dl, %cl -; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X64-NDD-NEXT: setge %al -; X64-NDD-NEXT: orb %cl, %al -; X64-NDD-NEXT: retq +; X64-LABEL: not_is_plus_inf_or_snan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %dl +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: orb %dl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 510) ; ~(+inf|snan) ret i1 %class } @@ -2457,39 +2443,25 @@ define i1 @not_is_minus_inf_or_snan_f(float %x) { ; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF ; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 ; X86-NEXT: setl %dl -; X86-NEXT: orb %cl, %dl ; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 ; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al ; X86-NEXT: orb %dl, %al ; X86-NEXT: retl ; -; X64-GENERIC-LABEL: not_is_minus_inf_or_snan_f: -; X64-GENERIC: # %bb.0: -; X64-GENERIC-NEXT: movd %xmm0, %eax -; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X64-GENERIC-NEXT: sete %cl -; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X64-GENERIC-NEXT: setl %dl -; X64-GENERIC-NEXT: orb %cl, %dl -; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X64-GENERIC-NEXT: setge %al -; X64-GENERIC-NEXT: orb %dl, %al -; X64-GENERIC-NEXT: retq -; -; X64-NDD-LABEL: not_is_minus_inf_or_snan_f: -; X64-NDD: # %bb.0: -; X64-NDD-NEXT: movd %xmm0, %eax -; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X64-NDD-NEXT: sete %cl -; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X64-NDD-NEXT: setl %dl -; X64-NDD-NEXT: orb %dl, %cl -; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X64-NDD-NEXT: setge %al -; X64-NDD-NEXT: orb %cl, %al -; X64-NDD-NEXT: retq +; X64-LABEL: not_is_minus_inf_or_snan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %dl +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: orb %dl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1018) ; "~(-inf|snan)" ret i1 %class } diff --git a/llvm/test/CodeGen/X86/lea-opt-cse4.ll b/llvm/test/CodeGen/X86/lea-opt-cse4.ll index 4fa9acd99bb2f..e0495f1a6bc18 100644 --- a/llvm/test/CodeGen/X86/lea-opt-cse4.ll +++ b/llvm/test/CodeGen/X86/lea-opt-cse4.ll @@ -72,10 +72,11 @@ define void @foo_loop(ptr nocapture %ctx, i32 %n) local_unnamed_addr #0 { ; X64-NEXT: addl %eax, %ecx ; X64-NEXT: leal 1(%rax,%rcx), %ecx ; X64-NEXT: leal (%rax,%rax), %edx -; X64-NEXT: addl %eax, %edx -; X64-NEXT: addl %edx, %ecx -; X64-NEXT: addl %edx, %ecx -; X64-NEXT: movl %ecx, 16(%rdi) +; X64-NEXT: leal (%rdx,%rax), %esi +; X64-NEXT: addl %eax, %esi +; X64-NEXT: addl %ecx, %esi +; X64-NEXT: addl %edx, %esi +; X64-NEXT: movl %esi, 16(%rdi) ; X64-NEXT: retq ; ; X86-LABEL: foo_loop: @@ -101,10 +102,11 @@ define void @foo_loop(ptr nocapture %ctx, i32 %n) local_unnamed_addr #0 { ; X86-NEXT: addl %ecx, %esi ; X86-NEXT: leal 1(%ecx,%esi), %edx ; X86-NEXT: leal (%ecx,%ecx), %esi -; X86-NEXT: addl %ecx, %esi -; X86-NEXT: addl %esi, %edx -; X86-NEXT: addl %esi, %edx -; X86-NEXT: movl %edx, 16(%eax) +; X86-NEXT: leal (%esi,%ecx), %edi +; X86-NEXT: addl %ecx, %edi +; X86-NEXT: addl %edx, %edi +; X86-NEXT: addl %esi, %edi +; X86-NEXT: movl %edi, 16(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/machine-cp.ll b/llvm/test/CodeGen/X86/machine-cp.ll index c84a1159ad56a..7e61648614a0c 100644 --- a/llvm/test/CodeGen/X86/machine-cp.ll +++ b/llvm/test/CodeGen/X86/machine-cp.ll @@ -99,54 +99,54 @@ while.end: ; preds = %while.body, %entry define <16 x float> @foo(<16 x float> %x) { ; CHECK-LABEL: foo: ; CHECK: ## %bb.0: ## %bb -; CHECK-NEXT: xorps %xmm5, %xmm5 -; CHECK-NEXT: cvttps2dq %xmm3, %xmm8 -; CHECK-NEXT: movaps %xmm3, %xmm4 -; CHECK-NEXT: cmpltps %xmm5, %xmm4 -; CHECK-NEXT: movaps {{.*#+}} xmm7 = [13,14,15,16] -; CHECK-NEXT: movaps %xmm4, %xmm6 -; CHECK-NEXT: orps %xmm7, %xmm6 -; CHECK-NEXT: cvtdq2ps %xmm8, %xmm3 +; CHECK-NEXT: xorps %xmm6, %xmm6 +; CHECK-NEXT: cvttps2dq %xmm3, %xmm4 +; CHECK-NEXT: movaps %xmm3, %xmm8 +; CHECK-NEXT: cmpltps %xmm6, %xmm8 +; CHECK-NEXT: movaps {{.*#+}} xmm5 = [13,14,15,16] +; CHECK-NEXT: movaps %xmm8, %xmm7 +; CHECK-NEXT: orps %xmm5, %xmm7 +; CHECK-NEXT: cvtdq2ps %xmm4, %xmm3 ; CHECK-NEXT: andps %xmm7, %xmm3 -; CHECK-NEXT: andps %xmm6, %xmm3 -; CHECK-NEXT: andnps %xmm4, %xmm6 +; CHECK-NEXT: andnps %xmm8, %xmm7 ; CHECK-NEXT: cvttps2dq %xmm2, %xmm4 -; CHECK-NEXT: movaps %xmm2, %xmm7 -; CHECK-NEXT: cmpltps %xmm5, %xmm7 +; CHECK-NEXT: movaps %xmm2, %xmm10 +; CHECK-NEXT: cmpltps %xmm6, %xmm10 ; CHECK-NEXT: movaps {{.*#+}} xmm8 = [9,10,11,12] -; CHECK-NEXT: movaps %xmm7, %xmm9 +; CHECK-NEXT: movaps %xmm10, %xmm9 ; CHECK-NEXT: orps %xmm8, %xmm9 ; CHECK-NEXT: cvtdq2ps %xmm4, %xmm2 -; CHECK-NEXT: andps %xmm8, %xmm2 ; CHECK-NEXT: andps %xmm9, %xmm2 -; CHECK-NEXT: andnps %xmm7, %xmm9 +; CHECK-NEXT: andnps %xmm10, %xmm9 ; CHECK-NEXT: cvttps2dq %xmm1, %xmm4 -; CHECK-NEXT: cmpltps %xmm5, %xmm1 -; CHECK-NEXT: movaps {{.*#+}} xmm7 = [5,6,7,8] -; CHECK-NEXT: movaps %xmm1, %xmm8 -; CHECK-NEXT: orps %xmm7, %xmm8 +; CHECK-NEXT: cmpltps %xmm6, %xmm1 +; CHECK-NEXT: movaps {{.*#+}} xmm10 = [5,6,7,8] +; CHECK-NEXT: movaps %xmm1, %xmm11 +; CHECK-NEXT: orps %xmm10, %xmm11 ; CHECK-NEXT: cvtdq2ps %xmm4, %xmm4 -; CHECK-NEXT: andps %xmm7, %xmm4 -; CHECK-NEXT: andps %xmm8, %xmm4 -; CHECK-NEXT: andnps %xmm1, %xmm8 +; CHECK-NEXT: andps %xmm11, %xmm4 +; CHECK-NEXT: andnps %xmm1, %xmm11 ; CHECK-NEXT: cvttps2dq %xmm0, %xmm1 -; CHECK-NEXT: cmpltps %xmm5, %xmm0 -; CHECK-NEXT: movaps {{.*#+}} xmm5 = [1,2,3,4] -; CHECK-NEXT: movaps %xmm0, %xmm7 -; CHECK-NEXT: orps %xmm5, %xmm7 +; CHECK-NEXT: cmpltps %xmm6, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm6 = [1,2,3,4] +; CHECK-NEXT: movaps %xmm0, %xmm12 +; CHECK-NEXT: orps %xmm6, %xmm12 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 -; CHECK-NEXT: andps %xmm5, %xmm1 -; CHECK-NEXT: andps %xmm7, %xmm1 -; CHECK-NEXT: andnps %xmm0, %xmm7 +; CHECK-NEXT: andps %xmm12, %xmm1 +; CHECK-NEXT: andnps %xmm0, %xmm12 +; CHECK-NEXT: andps %xmm6, %xmm1 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1] -; CHECK-NEXT: andps %xmm0, %xmm7 -; CHECK-NEXT: orps %xmm7, %xmm1 -; CHECK-NEXT: andps %xmm0, %xmm8 -; CHECK-NEXT: orps %xmm8, %xmm4 +; CHECK-NEXT: andps %xmm0, %xmm12 +; CHECK-NEXT: orps %xmm12, %xmm1 +; CHECK-NEXT: andps %xmm10, %xmm4 +; CHECK-NEXT: andps %xmm0, %xmm11 +; CHECK-NEXT: orps %xmm11, %xmm4 +; CHECK-NEXT: andps %xmm8, %xmm2 ; CHECK-NEXT: andps %xmm0, %xmm9 ; CHECK-NEXT: orps %xmm9, %xmm2 -; CHECK-NEXT: andps %xmm0, %xmm6 -; CHECK-NEXT: orps %xmm6, %xmm3 +; CHECK-NEXT: andps %xmm0, %xmm7 +; CHECK-NEXT: andps %xmm5, %xmm3 +; CHECK-NEXT: orps %xmm7, %xmm3 ; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: movaps %xmm4, %xmm1 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index bdb7c307a5759..43ae8b1db9111 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -980,8 +980,8 @@ define i32 @_Z9test_charPcS_i_1024(ptr nocapture readonly, ptr nocapture readonl ; SSE2-NEXT: paddd %xmm4, %xmm3 ; SSE2-NEXT: paddd %xmm0, %xmm2 ; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: paddd %xmm2, %xmm1 ; SSE2-NEXT: paddd %xmm3, %xmm1 +; SSE2-NEXT: paddd %xmm2, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] ; SSE2-NEXT: paddd %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 1289eef7795dc..bc8e6dff699aa 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -503,10 +503,10 @@ define <8 x i32> @test9(ptr %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0 ; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0 ; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero ; KNL_64-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1 -; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm1 +; KNL_64-NEXT: vpaddq %zmm1, %zmm2, %zmm1 +; KNL_64-NEXT: vpaddq %zmm0, %zmm1, %zmm1 ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; KNL_64-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1} @@ -587,10 +587,10 @@ define <8 x i32> @test10(ptr %base, <8 x i64> %i1, <8 x i32>%ind5) { ; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0 ; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0 ; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero ; KNL_64-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1 -; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm1 +; KNL_64-NEXT: vpaddq %zmm1, %zmm2, %zmm1 +; KNL_64-NEXT: vpaddq %zmm0, %zmm1, %zmm1 ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; KNL_64-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1} diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll index ee5fd78c64379..57e782ee66d34 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll @@ -1768,9 +1768,9 @@ define i1 @length48_eq_const(ptr %X) nounwind { ; X86-SSE2-NEXT: movdqu 32(%eax), %xmm2 ; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm1, %xmm0 ; X86-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 ; X86-SSE2-NEXT: pand %xmm0, %xmm2 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 ; X86-SSE2-NEXT: pmovmskb %xmm2, %eax ; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; X86-SSE2-NEXT: setne %al @@ -1784,9 +1784,9 @@ define i1 @length48_eq_const(ptr %X) nounwind { ; X86-SSE41-NEXT: movdqu 32(%eax), %xmm2 ; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE41-NEXT: por %xmm1, %xmm0 ; X86-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 ; X86-SSE41-NEXT: por %xmm0, %xmm2 +; X86-SSE41-NEXT: por %xmm1, %xmm2 ; X86-SSE41-NEXT: ptest %xmm2, %xmm2 ; X86-SSE41-NEXT: setne %al ; X86-SSE41-NEXT: retl diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll index a46f9ed3d3798..a52fe02972cd5 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -1598,9 +1598,9 @@ define i1 @length48_eq(ptr %x, ptr %y) nounwind { ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm3 ; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 ; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm3, %xmm0 ; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1 ; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1 +; X64-SSE2-NEXT: pand %xmm3, %xmm1 ; X64-SSE2-NEXT: pand %xmm0, %xmm1 ; X64-SSE2-NEXT: pmovmskb %xmm1, %eax ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF @@ -1616,9 +1616,9 @@ define i1 @length48_eq(ptr %x, ptr %y) nounwind { ; X64-SSE41-NEXT: pxor %xmm0, %xmm3 ; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 ; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm3, %xmm0 ; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1 ; X64-SSE41-NEXT: pxor %xmm2, %xmm1 +; X64-SSE41-NEXT: por %xmm3, %xmm1 ; X64-SSE41-NEXT: por %xmm0, %xmm1 ; X64-SSE41-NEXT: ptest %xmm1, %xmm1 ; X64-SSE41-NEXT: sete %al @@ -1720,9 +1720,9 @@ define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"= ; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm3 ; X64-SSE2-NEXT: movdqu 16(%rsi), %xmm0 ; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; X64-SSE2-NEXT: pand %xmm3, %xmm0 ; X64-SSE2-NEXT: movdqu 32(%rsi), %xmm1 ; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm1 +; X64-SSE2-NEXT: pand %xmm3, %xmm1 ; X64-SSE2-NEXT: pand %xmm0, %xmm1 ; X64-SSE2-NEXT: pmovmskb %xmm1, %eax ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF @@ -1738,9 +1738,9 @@ define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"= ; X64-SSE41-NEXT: pxor %xmm0, %xmm3 ; X64-SSE41-NEXT: movdqu 16(%rsi), %xmm0 ; X64-SSE41-NEXT: pxor %xmm1, %xmm0 -; X64-SSE41-NEXT: por %xmm3, %xmm0 ; X64-SSE41-NEXT: movdqu 32(%rsi), %xmm1 ; X64-SSE41-NEXT: pxor %xmm2, %xmm1 +; X64-SSE41-NEXT: por %xmm3, %xmm1 ; X64-SSE41-NEXT: por %xmm0, %xmm1 ; X64-SSE41-NEXT: ptest %xmm1, %xmm1 ; X64-SSE41-NEXT: sete %al @@ -1753,8 +1753,8 @@ define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"= ; X64-AVX-NEXT: vmovdqu 32(%rdi), %xmm2 ; X64-AVX-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 ; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpxor 32(%rsi), %xmm2, %xmm1 +; X64-AVX-NEXT: vpxor 32(%rsi), %xmm2, %xmm2 +; X64-AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 ; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al @@ -1789,9 +1789,9 @@ define i1 @length48_eq_const(ptr %X) nounwind { ; X64-SSE2-NEXT: movdqu 32(%rdi), %xmm2 ; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE2-NEXT: pand %xmm1, %xmm0 ; X64-SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; X64-SSE2-NEXT: pand %xmm0, %xmm2 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 ; X64-SSE2-NEXT: pmovmskb %xmm2, %eax ; X64-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; X64-SSE2-NEXT: setne %al @@ -1804,9 +1804,9 @@ define i1 @length48_eq_const(ptr %X) nounwind { ; X64-SSE41-NEXT: movdqu 32(%rdi), %xmm2 ; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE41-NEXT: por %xmm1, %xmm0 ; X64-SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; X64-SSE41-NEXT: por %xmm0, %xmm2 +; X64-SSE41-NEXT: por %xmm1, %xmm2 ; X64-SSE41-NEXT: ptest %xmm2, %xmm2 ; X64-SSE41-NEXT: setne %al ; X64-SSE41-NEXT: retq @@ -2353,8 +2353,8 @@ define i1 @length96_eq(ptr %x, ptr %y) nounwind { ; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2 ; X64-AVX1-NEXT: vxorps 32(%rsi), %ymm1, %ymm1 ; X64-AVX1-NEXT: vxorps (%rsi), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vxorps 64(%rsi), %ymm2, %ymm1 +; X64-AVX1-NEXT: vxorps 64(%rsi), %ymm2, %ymm2 +; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 ; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-AVX1-NEXT: vptest %ymm0, %ymm0 ; X64-AVX1-NEXT: setne %al @@ -2368,8 +2368,8 @@ define i1 @length96_eq(ptr %x, ptr %y) nounwind { ; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 ; X64-AVX2-NEXT: vpxor 32(%rsi), %ymm1, %ymm1 ; X64-AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpxor 64(%rsi), %ymm2, %ymm1 +; X64-AVX2-NEXT: vpxor 64(%rsi), %ymm2, %ymm2 +; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: vptest %ymm0, %ymm0 ; X64-AVX2-NEXT: setne %al @@ -2482,8 +2482,8 @@ define i1 @length96_eq_const(ptr %X) nounwind { ; X64-AVX1-NEXT: vmovups 64(%rdi), %ymm2 ; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 +; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; X64-AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 ; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-AVX1-NEXT: vptest %ymm0, %ymm0 ; X64-AVX1-NEXT: sete %al @@ -2497,8 +2497,8 @@ define i1 @length96_eq_const(ptr %X) nounwind { ; X64-AVX2-NEXT: vmovdqu 64(%rdi), %ymm2 ; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 +; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; X64-AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: vptest %ymm0, %ymm0 ; X64-AVX2-NEXT: sete %al diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll index 7d882b772a64d..968e6210ccc6e 100644 --- a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll @@ -407,18 +407,18 @@ define <4 x i64> @vec256_i64_signed_reg_reg(<4 x i64> %a1, <4 x i64> %a2) nounwi ; AVX1-NEXT: vpaddq %xmm1, %xmm9, %xmm1 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 ; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1 ; AVX1-NEXT: vpsrlq $33, %xmm2, %xmm2 ; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm4 ; AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm7 -; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; AVX1-NEXT: vpaddq %xmm2, %xmm7, %xmm2 +; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm5 +; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0 -; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -465,18 +465,18 @@ define <4 x i64> @vec256_i64_signed_reg_reg(<4 x i64> %a1, <4 x i64> %a2) nounwi ; XOP-NEXT: vpaddq %xmm1, %xmm9, %xmm1 ; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 ; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1 ; XOP-NEXT: vpsrlq $33, %xmm2, %xmm2 ; XOP-NEXT: vpor %xmm4, %xmm8, %xmm4 ; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 -; XOP-NEXT: vpsrlq $32, %xmm4, %xmm7 -; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; XOP-NEXT: vpaddq %xmm2, %xmm7, %xmm2 +; XOP-NEXT: vpsrlq $32, %xmm4, %xmm5 +; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; XOP-NEXT: vpsllq $32, %xmm2, %xmm2 ; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; XOP-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0 -; XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOP-NEXT: retq ; @@ -578,18 +578,18 @@ define <4 x i64> @vec256_i64_unsigned_reg_reg(<4 x i64> %a1, <4 x i64> %a2) noun ; AVX1-NEXT: vpaddq %xmm1, %xmm9, %xmm1 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 ; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4 +; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpsrlq $33, %xmm3, %xmm3 -; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm5 -; AVX1-NEXT: vpmuludq %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm7 -; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; AVX1-NEXT: vpaddq %xmm3, %xmm7, %xmm3 -; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 +; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm4 +; AVX1-NEXT: vpmuludq %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm5 ; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 -; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 +; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 +; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 +; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 -; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -639,18 +639,18 @@ define <4 x i64> @vec256_i64_unsigned_reg_reg(<4 x i64> %a1, <4 x i64> %a2) noun ; XOP-NEXT: vpaddq %xmm1, %xmm9, %xmm1 ; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 ; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1 ; XOP-NEXT: vpsrlq $33, %xmm2, %xmm2 ; XOP-NEXT: vpor %xmm4, %xmm8, %xmm4 ; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 -; XOP-NEXT: vpsrlq $32, %xmm4, %xmm7 -; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; XOP-NEXT: vpaddq %xmm2, %xmm7, %xmm2 +; XOP-NEXT: vpsrlq $32, %xmm4, %xmm5 +; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; XOP-NEXT: vpsllq $32, %xmm2, %xmm2 ; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; XOP-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0 -; XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOP-NEXT: retq ; @@ -727,18 +727,18 @@ define <4 x i64> @vec256_i64_unsigned_reg_reg(<4 x i64> %a1, <4 x i64> %a2) noun define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwind { ; AVX1-LABEL: vec256_i64_signed_mem_reg: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovdqa (%rdi), %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vmovdqa (%rdi), %xmm1 ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5 -; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 +; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsubq %xmm0, %xmm5, %xmm0 -; AVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm6 +; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm6 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm7 ; AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1] @@ -749,19 +749,19 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin ; AVX1-NEXT: vpaddq %xmm0, %xmm9, %xmm0 ; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 ; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 -; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0 +; AVX1-NEXT: vpsrlq $33, %xmm2, %xmm2 ; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm4 -; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm7 -; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; AVX1-NEXT: vpaddq %xmm1, %xmm7, %xmm1 -; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm5 +; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 +; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 -; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: vec256_i64_signed_mem_reg: @@ -787,18 +787,18 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin ; ; XOP-LABEL: vec256_i64_signed_mem_reg: ; XOP: # %bb.0: -; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1 -; XOP-NEXT: vmovdqa (%rdi), %xmm2 +; XOP-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOP-NEXT: vmovdqa (%rdi), %xmm1 ; XOP-NEXT: vmovdqa 16(%rdi), %xmm3 -; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm4 -; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm5 -; XOP-NEXT: vpsubq %xmm0, %xmm2, %xmm0 +; XOP-NEXT: vpcomgtq %xmm2, %xmm3, %xmm4 +; XOP-NEXT: vpcomgtq %xmm0, %xmm1, %xmm5 +; XOP-NEXT: vpsubq %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vpxor %xmm5, %xmm0, %xmm0 ; XOP-NEXT: vpsubq %xmm0, %xmm5, %xmm0 -; XOP-NEXT: vpsubq %xmm1, %xmm3, %xmm1 -; XOP-NEXT: vpxor %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm6 +; XOP-NEXT: vpsubq %xmm2, %xmm3, %xmm2 +; XOP-NEXT: vpxor %xmm4, %xmm2, %xmm2 +; XOP-NEXT: vpsubq %xmm2, %xmm4, %xmm2 +; XOP-NEXT: vpsrlq $1, %xmm2, %xmm6 ; XOP-NEXT: vpsrlq $1, %xmm0, %xmm7 ; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0 ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1] @@ -809,19 +809,19 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin ; XOP-NEXT: vpaddq %xmm0, %xmm9, %xmm0 ; XOP-NEXT: vpsllq $32, %xmm0, %xmm0 ; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0 +; XOP-NEXT: vpsrlq $33, %xmm2, %xmm2 ; XOP-NEXT: vpor %xmm4, %xmm8, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm4, %xmm7 -; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; XOP-NEXT: vpaddq %xmm1, %xmm7, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 +; XOP-NEXT: vpsrlq $32, %xmm4, %xmm5 +; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 +; XOP-NEXT: vpsllq $32, %xmm2, %xmm2 ; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3 -; XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; XOP-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOP-NEXT: retq ; ; AVX512F-LABEL: vec256_i64_signed_mem_reg: @@ -919,18 +919,18 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin ; AVX1-NEXT: vpaddq %xmm1, %xmm9, %xmm1 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 ; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1 ; AVX1-NEXT: vpsrlq $33, %xmm2, %xmm2 ; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm4 ; AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm7 -; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; AVX1-NEXT: vpaddq %xmm2, %xmm7, %xmm2 +; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm5 +; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0 -; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -979,18 +979,18 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin ; XOP-NEXT: vpaddq %xmm1, %xmm9, %xmm1 ; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 ; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1 ; XOP-NEXT: vpsrlq $33, %xmm2, %xmm2 ; XOP-NEXT: vpor %xmm4, %xmm8, %xmm4 ; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 -; XOP-NEXT: vpsrlq $32, %xmm4, %xmm7 -; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; XOP-NEXT: vpaddq %xmm2, %xmm7, %xmm2 +; XOP-NEXT: vpsrlq $32, %xmm4, %xmm5 +; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 ; XOP-NEXT: vpsllq $32, %xmm2, %xmm2 ; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; XOP-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0 -; XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOP-NEXT: retq ; @@ -1090,18 +1090,18 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; AVX1-NEXT: vpaddq %xmm0, %xmm9, %xmm0 ; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 ; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0 ; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm4 ; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm7 -; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; AVX1-NEXT: vpaddq %xmm1, %xmm7, %xmm1 +; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm5 +; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 ; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2 -; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -1152,18 +1152,18 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; XOP-NEXT: vpaddq %xmm0, %xmm9, %xmm0 ; XOP-NEXT: vpsllq $32, %xmm0, %xmm0 ; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0 ; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 ; XOP-NEXT: vpor %xmm4, %xmm8, %xmm4 ; XOP-NEXT: vpmuludq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm4, %xmm7 -; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm7 -; XOP-NEXT: vpaddq %xmm1, %xmm7, %xmm1 +; XOP-NEXT: vpsrlq $32, %xmm4, %xmm5 +; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5 +; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1 ; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 ; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 ; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3 ; XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1 -; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; XOP-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll index 1f9e7a93ad0b9..eaffbe892a353 100644 --- a/llvm/test/CodeGen/X86/mul-constant-result.ll +++ b/llvm/test/CodeGen/X86/mul-constant-result.ll @@ -524,18 +524,15 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 { define i32 @foo() local_unnamed_addr #0 { ; X86-LABEL: foo: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: pushl %ebx -; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 -; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 ; X86-NEXT: pushl $0 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $1 @@ -543,8 +540,8 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %esi -; X86-NEXT: xorl $1, %esi +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $1, %edi ; X86-NEXT: pushl $1 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $2 @@ -552,9 +549,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %edi -; X86-NEXT: xorl $2, %edi -; X86-NEXT: orl %esi, %edi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $2, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $1 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $3 @@ -562,8 +559,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $3, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $3, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $2 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $4 @@ -573,7 +571,6 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl $4, %esi -; X86-NEXT: orl %ebx, %esi ; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $2 ; X86-NEXT: .cfi_adjust_cfa_offset 4 @@ -584,6 +581,7 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %edi ; X86-NEXT: xorl $5, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $3 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $6 @@ -591,9 +589,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $6, %ebx -; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $6, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $3 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $7 @@ -603,7 +601,6 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %edi ; X86-NEXT: xorl $7, %edi -; X86-NEXT: orl %ebx, %edi ; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $4 ; X86-NEXT: .cfi_adjust_cfa_offset 4 @@ -612,8 +609,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $8, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $8, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $4 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $9 @@ -621,9 +619,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %esi -; X86-NEXT: xorl $9, %esi -; X86-NEXT: orl %ebx, %esi +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $9, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $5 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $10 @@ -631,9 +629,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $10, %ebx -; X86-NEXT: orl %esi, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $10, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $5 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $11 @@ -641,10 +639,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %esi -; X86-NEXT: xorl $11, %esi -; X86-NEXT: orl %ebx, %esi -; X86-NEXT: orl %edi, %esi +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $11, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $6 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $12 @@ -652,8 +649,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $12, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $12, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $6 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $13 @@ -663,7 +661,7 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %edi ; X86-NEXT: xorl $13, %edi -; X86-NEXT: orl %ebx, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $7 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $14 @@ -671,9 +669,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $14, %ebx -; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $14, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $7 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $15 @@ -681,9 +679,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: xorl $15, %ebp -; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $15, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $8 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $16 @@ -691,10 +689,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %edi -; X86-NEXT: xorl $16, %edi -; X86-NEXT: orl %ebp, %edi -; X86-NEXT: orl %esi, %edi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $16, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $8 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $17 @@ -702,8 +699,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $17, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $17, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $9 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $18 @@ -713,7 +711,7 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl $18, %esi -; X86-NEXT: orl %ebx, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $9 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $19 @@ -721,9 +719,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $19, %ebx -; X86-NEXT: orl %esi, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $19, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $10 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $20 @@ -733,7 +731,7 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl $20, %esi -; X86-NEXT: orl %ebx, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $10 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $21 @@ -741,9 +739,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $21, %ebx -; X86-NEXT: orl %esi, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $21, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $11 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $22 @@ -753,7 +751,6 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl $22, %esi -; X86-NEXT: orl %ebx, %esi ; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $11 ; X86-NEXT: .cfi_adjust_cfa_offset 4 @@ -762,8 +759,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $23, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $23, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $12 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $24 @@ -771,9 +769,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %edi -; X86-NEXT: xorl $24, %edi -; X86-NEXT: orl %ebx, %edi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $24, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $12 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $25 @@ -781,9 +779,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $25, %ebx -; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $25, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $13 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $26 @@ -791,9 +789,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %edi -; X86-NEXT: xorl $26, %edi -; X86-NEXT: orl %ebx, %edi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $26, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $13 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $27 @@ -801,9 +799,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: xorl $27, %ebx -; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $27, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $14 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $28 @@ -811,9 +809,9 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: calll mult@PLT ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -8 -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: xorl $28, %ebp -; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $28, %esi +; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $14 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $29 @@ -823,7 +821,6 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %edi ; X86-NEXT: xorl $29, %edi -; X86-NEXT: orl %ebp, %edi ; X86-NEXT: orl %esi, %edi ; X86-NEXT: pushl $15 ; X86-NEXT: .cfi_adjust_cfa_offset 4 @@ -834,6 +831,7 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: xorl $30, %ebx +; X86-NEXT: orl %edi, %ebx ; X86-NEXT: pushl $15 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $31 @@ -844,7 +842,6 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl $31, %esi ; X86-NEXT: orl %ebx, %esi -; X86-NEXT: orl %edi, %esi ; X86-NEXT: pushl $16 ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $32 @@ -859,12 +856,10 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: negl %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 16 -; X86-NEXT: popl %edi ; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: popl %ebx +; X86-NEXT: popl %edi ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: popl %ebp +; X86-NEXT: popl %ebx ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/mul-i512.ll b/llvm/test/CodeGen/X86/mul-i512.ll index 64f6746e616ed..055432e486148 100644 --- a/llvm/test/CodeGen/X86/mul-i512.ll +++ b/llvm/test/CodeGen/X86/mul-i512.ll @@ -186,17 +186,17 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %ebx ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: mull %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%ecx), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: mull %ecx +; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, %esi ; X86-NEXT: addl %edi, %esi @@ -800,16 +800,16 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl 60(%edi), %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: imull %eax, %esi ; X86-NEXT: movl 56(%edi), %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: mull %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %esi, %edx ; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl 60(%edi), %eax -; X86-NEXT: imull %ebp, %eax -; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl 48(%edi), %esi ; X86-NEXT: movl 52(%edi), %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -837,15 +837,15 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X86-NEXT: addl %ebp, %ebx ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: mull %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: mull %ebp ; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %ebx, %ebp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ebx, %edi ; X86-NEXT: adcl %ecx, %esi ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: mull %edi +; X86-NEXT: mull %ebp ; X86-NEXT: addl %esi, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx @@ -853,13 +853,13 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: adcl (%esp), %eax # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/X86/mul64.ll b/llvm/test/CodeGen/X86/mul64.ll index 25d10b06402d2..fb464a7b0bd30 100644 --- a/llvm/test/CodeGen/X86/mul64.ll +++ b/llvm/test/CodeGen/X86/mul64.ll @@ -11,9 +11,9 @@ define i64 @foo(i64 %t, i64 %u) nounwind { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %edx ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi ; X86-NEXT: addl %esi, %edx +; X86-NEXT: addl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/pr62653.ll b/llvm/test/CodeGen/X86/pr62653.ll index b6a1bf47983dc..54a38981b6d40 100644 --- a/llvm/test/CodeGen/X86/pr62653.ll +++ b/llvm/test/CodeGen/X86/pr62653.ll @@ -26,39 +26,39 @@ define <64 x i4> @pr62653(<64 x i4> %a0) nounwind { ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-NEXT: andl $15, %r10d ; CHECK-NEXT: shll $20, %r10d -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; CHECK-NEXT: andl $15, %r11d -; CHECK-NEXT: shll $24, %r11d -; CHECK-NEXT: orq %r10, %r11 +; CHECK-NEXT: orq %rdi, %r10 +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; CHECK-NEXT: andl $15, %edi +; CHECK-NEXT: shll $24, %edi +; CHECK-NEXT: orq %r10, %rdi ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-NEXT: shll $28, %r10d -; CHECK-NEXT: orq %r11, %r10 -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; CHECK-NEXT: andl $15, %r11d -; CHECK-NEXT: shlq $32, %r11 -; CHECK-NEXT: orq %r10, %r11 +; CHECK-NEXT: orq %rdi, %r10 +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; CHECK-NEXT: andl $15, %edi +; CHECK-NEXT: shlq $32, %rdi +; CHECK-NEXT: orq %r10, %rdi ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-NEXT: andl $15, %r10d ; CHECK-NEXT: shlq $36, %r10 -; CHECK-NEXT: orq %r11, %r10 ; CHECK-NEXT: orq %rdi, %r10 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-NEXT: andl $15, %edi ; CHECK-NEXT: shlq $40, %rdi -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; CHECK-NEXT: andl $15, %r11d -; CHECK-NEXT: shlq $44, %r11 -; CHECK-NEXT: orq %rdi, %r11 +; CHECK-NEXT: orq %r10, %rdi +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; CHECK-NEXT: andl $15, %r10d +; CHECK-NEXT: shlq $44, %r10 +; CHECK-NEXT: orq %rdi, %r10 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-NEXT: andl $15, %edi ; CHECK-NEXT: shlq $48, %rdi -; CHECK-NEXT: orq %r11, %rdi -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; CHECK-NEXT: andl $15, %r11d -; CHECK-NEXT: shlq $52, %r11 -; CHECK-NEXT: orq %rdi, %r11 -; CHECK-NEXT: orq %r10, %r11 -; CHECK-NEXT: movq %r11, 8(%rax) +; CHECK-NEXT: orq %r10, %rdi +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; CHECK-NEXT: andl $15, %r10d +; CHECK-NEXT: shlq $52, %r10 +; CHECK-NEXT: orq %rdi, %r10 +; CHECK-NEXT: movq %r10, 8(%rax) ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-NEXT: andl $15, %edi ; CHECK-NEXT: shlq $32, %rdi @@ -82,39 +82,39 @@ define <64 x i4> @pr62653(<64 x i4> %a0) nounwind { ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: andl $15, %edx ; CHECK-NEXT: shll $24, %edx -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi -; CHECK-NEXT: shll $28, %esi -; CHECK-NEXT: orl %edx, %esi -; CHECK-NEXT: orl %ecx, %esi -; CHECK-NEXT: orq %rdi, %rsi +; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK-NEXT: andl $15, %ecx -; CHECK-NEXT: shlq $36, %rcx -; CHECK-NEXT: orq %rsi, %rcx +; CHECK-NEXT: shll $28, %ecx +; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: orq %rdi, %rcx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: andl $15, %edx -; CHECK-NEXT: shlq $40, %rdx +; CHECK-NEXT: shlq $36, %rdx ; CHECK-NEXT: orq %rcx, %rdx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx ; CHECK-NEXT: andl $15, %ecx -; CHECK-NEXT: shlq $44, %rcx +; CHECK-NEXT: shlq $40, %rcx ; CHECK-NEXT: orq %rdx, %rcx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: andl $15, %edx -; CHECK-NEXT: shlq $48, %rdx -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi -; CHECK-NEXT: andl $15, %esi -; CHECK-NEXT: shlq $52, %rsi -; CHECK-NEXT: orq %rdx, %rsi +; CHECK-NEXT: shlq $44, %rdx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: andl $15, %ecx +; CHECK-NEXT: shlq $48, %rcx +; CHECK-NEXT: orq %rdx, %rcx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: andl $15, %edx -; CHECK-NEXT: shlq $56, %rdx -; CHECK-NEXT: orq %rsi, %rdx -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi -; CHECK-NEXT: shlq $60, %rsi -; CHECK-NEXT: orq %rdx, %rsi -; CHECK-NEXT: orq %rcx, %rsi -; CHECK-NEXT: movq %rsi, (%rax) +; CHECK-NEXT: shlq $52, %rdx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: andl $15, %ecx +; CHECK-NEXT: shlq $56, %rcx +; CHECK-NEXT: orq %rdx, %rcx +; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; CHECK-NEXT: shlq $60, %rdx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: movq %rdx, (%rax) ; CHECK-NEXT: retq %res = shufflevector <64 x i4> %a0, <64 x i4> zeroinitializer, <64 x i32> ret <64 x i4> %res diff --git a/llvm/test/CodeGen/X86/rotate-multi.ll b/llvm/test/CodeGen/X86/rotate-multi.ll index 8b4c852fd7ef7..4d3b70090d77e 100644 --- a/llvm/test/CodeGen/X86/rotate-multi.ll +++ b/llvm/test/CodeGen/X86/rotate-multi.ll @@ -79,27 +79,27 @@ define i32 @f3(i32 %a0) #0 { ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: shll $7, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: shll $13, %ecx ; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: shll $13, %edx -; CHECK-NEXT: orl %eax, %edx +; CHECK-NEXT: shll $19, %edx ; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shll $19, %eax ; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: shrl $2, %ecx +; CHECK-NEXT: orl %edx, %ecx ; CHECK-NEXT: orl %eax, %ecx -; CHECK-NEXT: movl %edi, %esi -; CHECK-NEXT: shrl $15, %esi -; CHECK-NEXT: orl %ecx, %esi -; CHECK-NEXT: orl %edx, %esi -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: shrl $23, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $15, %eax +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: shrl $23, %edx +; CHECK-NEXT: orl %eax, %edx ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: shrl $25, %eax -; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: orl %edx, %eax ; CHECK-NEXT: shrl $30, %edi ; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: retq b0: %v0 = shl i32 %a0, 3 diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index 245516974d15b..8e51e56b69e7c 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -198,15 +198,15 @@ define dso_local i32 @sad_32i8() nounwind { ; AVX1-NEXT: addq $32, %rax ; AVX1-NEXT: jne .LBB1_1 ; AVX1-NEXT: # %bb.2: # %middle.block -; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm4, %xmm5 -; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm3 +; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] @@ -357,8 +357,8 @@ define dso_local i32 @sad_avx64i8() nounwind { ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: .p2align 4 ; AVX1-NEXT: .LBB2_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -370,32 +370,33 @@ define dso_local i32 @sad_avx64i8() nounwind { ; AVX1-NEXT: vpsadbw b+1056(%rax), %xmm5, %xmm5 ; AVX1-NEXT: vmovdqa a+1072(%rax), %xmm6 ; AVX1-NEXT: vpsadbw b+1072(%rax), %xmm6, %xmm6 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 ; AVX1-NEXT: vpaddd %xmm7, %xmm6, %xmm6 -; AVX1-NEXT: vpaddd %xmm1, %xmm5, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm1, %ymm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 +; AVX1-NEXT: vpaddd %xmm2, %xmm5, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 ; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 ; AVX1-NEXT: addq $64, %rax ; AVX1-NEXT: jne .LBB2_1 ; AVX1-NEXT: # %bb.2: # %middle.block -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpaddd %xmm4, %xmm4, %xmm5 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 -; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm7 -; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm8 -; AVX1-NEXT: vpaddd %xmm0, %xmm8, %xmm8 -; AVX1-NEXT: vpaddd %xmm2, %xmm8, %xmm2 -; AVX1-NEXT: vpaddd %xmm7, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 +; AVX1-NEXT: vpaddd %xmm4, %xmm6, %xmm4 +; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm6 +; AVX1-NEXT: vpaddd %xmm6, %xmm2, %xmm2 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm6 +; AVX1-NEXT: vpaddd %xmm6, %xmm0, %xmm0 ; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm1 -; AVX1-NEXT: vpaddd %xmm1, %xmm6, %xmm2 -; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpaddd %xmm5, %xmm3, %xmm2 +; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/sext-vsetcc.ll b/llvm/test/CodeGen/X86/sext-vsetcc.ll index 0990c0b12f79a..f27124d6d76d0 100644 --- a/llvm/test/CodeGen/X86/sext-vsetcc.ll +++ b/llvm/test/CodeGen/X86/sext-vsetcc.ll @@ -571,50 +571,51 @@ define <8 x i32> @PR63946(<8 x i32> %a0, <8 x i32> %b0) nounwind { ; SSE-LABEL: PR63946: ; SSE: # %bb.0: # %entry ; SSE-NEXT: movdqa %xmm1, %xmm4 -; SSE-NEXT: movdqa %xmm0, %xmm13 +; SSE-NEXT: movdqa %xmm0, %xmm11 ; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,2,3,0] ; SSE-NEXT: pshufd {{.*#+}} xmm9 = xmm3[1,2,3,0] ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1] ; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,0,1] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[3,0,1,2] -; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,0,1,2] -; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE-NEXT: movdqa %xmm9, %xmm11 -; SSE-NEXT: pcmpeqd %xmm4, %xmm11 -; SSE-NEXT: movdqa %xmm7, %xmm12 -; SSE-NEXT: movdqa %xmm8, %xmm10 -; SSE-NEXT: movdqa %xmm5, %xmm15 -; SSE-NEXT: pcmpeqd %xmm4, %xmm15 -; SSE-NEXT: movdqa %xmm1, %xmm14 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,0,1,2] +; SSE-NEXT: movdqa %xmm11, %xmm6 +; SSE-NEXT: pcmpeqd %xmm2, %xmm6 +; SSE-NEXT: movdqa %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm9, %xmm10 +; SSE-NEXT: pcmpeqd %xmm4, %xmm10 +; SSE-NEXT: movdqa %xmm7, %xmm13 +; SSE-NEXT: movdqa %xmm8, %xmm12 +; SSE-NEXT: movdqa %xmm5, %xmm14 ; SSE-NEXT: pcmpeqd %xmm4, %xmm14 +; SSE-NEXT: movdqa %xmm1, %xmm15 +; SSE-NEXT: pcmpeqd %xmm4, %xmm15 ; SSE-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE-NEXT: pcmpeqd %xmm4, %xmm7 ; SSE-NEXT: pcmpeqd %xmm4, %xmm8 -; SSE-NEXT: movdqa %xmm6, %xmm0 -; SSE-NEXT: pcmpeqd %xmm4, %xmm6 +; SSE-NEXT: movdqa %xmm0, %xmm6 +; SSE-NEXT: pcmpeqd %xmm4, %xmm0 ; SSE-NEXT: pcmpeqd %xmm3, %xmm4 -; SSE-NEXT: por %xmm4, %xmm11 -; SSE-NEXT: pcmpeqd %xmm13, %xmm12 -; SSE-NEXT: por {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Folded Reload -; SSE-NEXT: pcmpeqd %xmm13, %xmm10 -; SSE-NEXT: pcmpeqd %xmm13, %xmm0 +; SSE-NEXT: por %xmm4, %xmm10 +; SSE-NEXT: pcmpeqd %xmm11, %xmm13 +; SSE-NEXT: por {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Folded Reload +; SSE-NEXT: pcmpeqd %xmm11, %xmm12 +; SSE-NEXT: por %xmm13, %xmm12 +; SSE-NEXT: por %xmm10, %xmm14 +; SSE-NEXT: pcmpeqd %xmm11, %xmm6 ; SSE-NEXT: por %xmm15, %xmm2 -; SSE-NEXT: por %xmm11, %xmm2 -; SSE-NEXT: pcmpeqd %xmm13, %xmm3 -; SSE-NEXT: por %xmm3, %xmm10 -; SSE-NEXT: por %xmm12, %xmm10 -; SSE-NEXT: por %xmm14, %xmm7 -; SSE-NEXT: pcmpeqd %xmm13, %xmm9 -; SSE-NEXT: por %xmm0, %xmm9 -; SSE-NEXT: pcmpeqd %xmm13, %xmm5 +; SSE-NEXT: por %xmm14, %xmm2 +; SSE-NEXT: pcmpeqd %xmm11, %xmm3 +; SSE-NEXT: por %xmm3, %xmm6 +; SSE-NEXT: por %xmm12, %xmm6 +; SSE-NEXT: por %xmm2, %xmm7 +; SSE-NEXT: pcmpeqd %xmm11, %xmm9 +; SSE-NEXT: pcmpeqd %xmm11, %xmm5 ; SSE-NEXT: por %xmm9, %xmm5 -; SSE-NEXT: por %xmm10, %xmm5 +; SSE-NEXT: por %xmm6, %xmm5 ; SSE-NEXT: por %xmm7, %xmm8 -; SSE-NEXT: por %xmm2, %xmm8 ; SSE-NEXT: packssdw %xmm8, %xmm5 -; SSE-NEXT: pcmpeqd %xmm13, %xmm1 -; SSE-NEXT: packssdw %xmm6, %xmm1 +; SSE-NEXT: pcmpeqd %xmm11, %xmm1 +; SSE-NEXT: packssdw %xmm0, %xmm1 ; SSE-NEXT: por %xmm5, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] @@ -638,12 +639,12 @@ define <8 x i32> @PR63946(<8 x i32> %a0, <8 x i32> %b0) nounwind { ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm2 ; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm3, %ymm2 -; AVX2-NEXT: vpcmpeqd %ymm0, %ymm4, %ymm3 -; AVX2-NEXT: vpcmpeqd %ymm0, %ymm5, %ymm4 -; AVX2-NEXT: vpor %ymm4, %ymm2, %ymm2 ; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqd %ymm0, %ymm6, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm4, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm5, %ymm3 ; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm6, %ymm3 +; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm7, %ymm2 ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm8, %ymm0 diff --git a/llvm/test/CodeGen/X86/smul_fix.ll b/llvm/test/CodeGen/X86/smul_fix.ll index ce56283df6010..3c93a1206dba6 100644 --- a/llvm/test/CodeGen/X86/smul_fix.ll +++ b/llvm/test/CodeGen/X86/smul_fix.ll @@ -231,9 +231,9 @@ define i64 @func5(i64 %x, i64 %y) { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %edx ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi ; X86-NEXT: addl %esi, %edx +; X86-NEXT: addl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/statepoint-live-in.ll b/llvm/test/CodeGen/X86/statepoint-live-in.ll index 787a33aa49b20..ce9ee2a2e522f 100644 --- a/llvm/test/CodeGen/X86/statepoint-live-in.ll +++ b/llvm/test/CodeGen/X86/statepoint-live-in.ll @@ -446,25 +446,25 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %edx, %r14d +; CHECK-NEXT: movl %edx, %r12d ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %r8d, %r15d +; CHECK-NEXT: movl %r8d, %r13d ; CHECK-NEXT: movl %r9d, %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r14d ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx @@ -474,36 +474,34 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: Ltmp13: ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload ; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Folded Reload -; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Folded Reload -; CHECK-NEXT: addq %rax, %r14 -; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 ## 8-byte Folded Reload -; CHECK-NEXT: addq %r14, %r15 +; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Folded Reload +; CHECK-NEXT: addq %rax, %r12 +; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 ## 8-byte Folded Reload +; CHECK-NEXT: addq %r12, %r13 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r15 +; CHECK-NEXT: addq %rax, %r13 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: addq %r15, %rbx -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r12 +; CHECK-NEXT: addq %r13, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r12 -; CHECK-NEXT: addq %rbx, %r12 +; CHECK-NEXT: addq %rax, %r15 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %rbp +; CHECK-NEXT: addq %rax, %r15 +; CHECK-NEXT: addq %rbx, %r15 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %rbp +; CHECK-NEXT: addq %rax, %r14 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %rbp -; CHECK-NEXT: addq %r12, %rbp +; CHECK-NEXT: addq %rax, %r14 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r13 +; CHECK-NEXT: addq %rax, %r14 +; CHECK-NEXT: addq %r15, %r14 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r13 +; CHECK-NEXT: addq %rax, %rbp ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r13 +; CHECK-NEXT: addq %rax, %rbp ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r13 -; CHECK-NEXT: addq %rbp, %r13 +; CHECK-NEXT: addq %rax, %rbp +; CHECK-NEXT: addq %r14, %rbp ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rcx @@ -511,10 +509,12 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: addq %rax, %rcx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: addq %rbp, %rcx +; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: addq %r13, %rcx -; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq $168, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/statepoint-regs.ll b/llvm/test/CodeGen/X86/statepoint-regs.ll index 5c26e29dce45e..dd9e913ec72c7 100644 --- a/llvm/test/CodeGen/X86/statepoint-regs.ll +++ b/llvm/test/CodeGen/X86/statepoint-regs.ll @@ -558,25 +558,25 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %edx, %r14d +; CHECK-NEXT: movl %edx, %r12d ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %r8d, %r15d +; CHECK-NEXT: movl %r8d, %r13d ; CHECK-NEXT: movl %r9d, %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r14d ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx @@ -586,36 +586,34 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: Ltmp14: ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload ; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Folded Reload -; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Folded Reload -; CHECK-NEXT: addq %rax, %r14 -; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 ## 8-byte Folded Reload -; CHECK-NEXT: addq %r14, %r15 +; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Folded Reload +; CHECK-NEXT: addq %rax, %r12 +; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 ## 8-byte Folded Reload +; CHECK-NEXT: addq %r12, %r13 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r15 +; CHECK-NEXT: addq %rax, %r13 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: addq %r15, %rbx +; CHECK-NEXT: addq %r13, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r12 -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r12 -; CHECK-NEXT: addq %rbx, %r12 +; CHECK-NEXT: addq %rax, %r15 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %rbp +; CHECK-NEXT: addq %rax, %r15 +; CHECK-NEXT: addq %rbx, %r15 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %rbp +; CHECK-NEXT: addq %rax, %r14 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %rbp -; CHECK-NEXT: addq %r12, %rbp +; CHECK-NEXT: addq %rax, %r14 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r13 +; CHECK-NEXT: addq %rax, %r14 +; CHECK-NEXT: addq %r15, %r14 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r13 +; CHECK-NEXT: addq %rax, %rbp ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r13 +; CHECK-NEXT: addq %rax, %rbp ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %r13 -; CHECK-NEXT: addq %rbp, %r13 +; CHECK-NEXT: addq %rax, %rbp +; CHECK-NEXT: addq %r14, %rbp ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rcx @@ -623,10 +621,12 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: addq %rax, %rcx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: addq %rbp, %rcx +; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: addq %r13, %rcx -; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq $168, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/ucmp.ll b/llvm/test/CodeGen/X86/ucmp.ll index cd643cb8d6375..6a5896f5ce348 100644 --- a/llvm/test/CodeGen/X86/ucmp.ll +++ b/llvm/test/CodeGen/X86/ucmp.ll @@ -3401,70 +3401,70 @@ define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind { ; X86-NEXT: sbbb $0, %cl ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: andl $3, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movb %cl, 4(%edi) -; X86-NEXT: movzbl %bh, %ebp -; X86-NEXT: movzbl %bl, %ecx -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movb %cl, 4(%esi) +; X86-NEXT: movzbl %bh, %edi +; X86-NEXT: movzbl %bl, %ebp +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; X86-NEXT: andl $3, %edi ; X86-NEXT: andl $3, %ebp +; X86-NEXT: leal (%ebp,%edi,4), %edi ; X86-NEXT: andl $3, %ecx -; X86-NEXT: leal (%ecx,%ebp,4), %ecx -; X86-NEXT: andl $3, %eax -; X86-NEXT: shll $4, %eax -; X86-NEXT: orl %ecx, %eax +; X86-NEXT: shll $4, %ecx +; X86-NEXT: orl %edi, %ecx ; X86-NEXT: andl $3, %ebx ; X86-NEXT: shll $6, %ebx -; X86-NEXT: orl %eax, %ebx -; X86-NEXT: andl $3, %esi -; X86-NEXT: shll $8, %esi -; X86-NEXT: orl %ebx, %esi +; X86-NEXT: orl %ecx, %ebx +; X86-NEXT: andl $3, %eax +; X86-NEXT: shll $8, %eax +; X86-NEXT: orl %ebx, %eax ; X86-NEXT: andl $3, %edx ; X86-NEXT: shll $10, %edx -; X86-NEXT: orl %esi, %edx +; X86-NEXT: orl %eax, %edx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X86-NEXT: andl $3, %eax ; X86-NEXT: shll $12, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X86-NEXT: andl $3, %ecx ; X86-NEXT: shll $14, %ecx ; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X86-NEXT: andl $3, %eax -; X86-NEXT: shll $16, %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X86-NEXT: andl $3, %esi -; X86-NEXT: shll $18, %esi -; X86-NEXT: orl %eax, %esi -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: andl $3, %edx +; X86-NEXT: shll $16, %edx +; X86-NEXT: orl %ecx, %edx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload ; X86-NEXT: andl $3, %eax -; X86-NEXT: shll $20, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload +; X86-NEXT: shll $18, %eax ; X86-NEXT: orl %edx, %eax +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload ; X86-NEXT: andl $3, %ecx -; X86-NEXT: shll $22, %ecx -; X86-NEXT: andl $3, %esi -; X86-NEXT: shll $24, %esi -; X86-NEXT: orl %ecx, %esi +; X86-NEXT: shll $20, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: andl $3, %edi +; X86-NEXT: shll $22, %edi +; X86-NEXT: orl %ecx, %edi +; X86-NEXT: andl $3, %edx +; X86-NEXT: shll $24, %edx +; X86-NEXT: orl %edi, %edx ; X86-NEXT: andl $3, %ebx ; X86-NEXT: shll $26, %ebx -; X86-NEXT: orl %esi, %ebx +; X86-NEXT: orl %edx, %ebx +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: andl $3, %eax +; X86-NEXT: shll $28, %eax +; X86-NEXT: orl %ebx, %eax ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X86-NEXT: andl $3, %ecx -; X86-NEXT: shll $28, %ecx -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X86-NEXT: shll $30, %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: movl %edx, (%edi) -; X86-NEXT: movl %edi, %eax +; X86-NEXT: shll $30, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: movl %esi, %eax ; X86-NEXT: addl $132, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/umul-with-overflow.ll b/llvm/test/CodeGen/X86/umul-with-overflow.ll index ccabb360a990c..b9213d81e33f4 100644 --- a/llvm/test/CodeGen/X86/umul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/umul-with-overflow.ll @@ -521,15 +521,14 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r8, %r11 -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r15 @@ -543,13 +542,13 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: adcq %r14, %rbp ; X64-NEXT: setb %al ; X64-NEXT: movzbl %al, %r10d -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: addq %rbp, %r13 ; X64-NEXT: adcq %r10, %r12 -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, %r14 @@ -559,7 +558,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %r15, %r10 ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, %r15 @@ -574,7 +573,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %rbp @@ -585,17 +584,17 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: mulq %r10 ; X64-NEXT: addq %rbp, %rax ; X64-NEXT: adcq %r13, %rdx -; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: addq %rdx, %rcx +; X64-NEXT: imulq %r10, %r8 +; X64-NEXT: addq %rdx, %r8 ; X64-NEXT: addq %r14, %r12 ; X64-NEXT: adcq %r15, %rax -; X64-NEXT: adcq %r11, %rcx -; X64-NEXT: imulq %r9, %r8 +; X64-NEXT: adcq %r11, %r8 +; X64-NEXT: imulq %r9, %rcx +; X64-NEXT: addq %r8, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: addq %rdx, %rsi -; X64-NEXT: addq %r8, %rsi ; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: movq %rbx, 8(%rdi) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload diff --git a/llvm/test/CodeGen/X86/umul_fix.ll b/llvm/test/CodeGen/X86/umul_fix.ll index eacc714b49a4d..9dcf7081ac178 100644 --- a/llvm/test/CodeGen/X86/umul_fix.ll +++ b/llvm/test/CodeGen/X86/umul_fix.ll @@ -185,9 +185,9 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %edx ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi ; X86-NEXT: addl %esi, %edx +; X86-NEXT: addl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: retl %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 0) diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll index 82603b35ba712..16be653dfd50f 100644 --- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -108,46 +108,43 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: addl %ebp, %eax ; X86-NEXT: adcl %ebx, %edx -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: setb %bl ; X86-NEXT: testl %esi, %esi ; X86-NEXT: setne %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setne %ch -; X86-NEXT: andb %cl, %ch -; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X86-NEXT: orb %ch, %cl -; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: setne %bh +; X86-NEXT: andb %cl, %bh +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload ; X86-NEXT: testl %edi, %edi -; X86-NEXT: setne %cl +; X86-NEXT: setne %ch ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: testl %edi, %edi -; X86-NEXT: setne %ch -; X86-NEXT: andb %cl, %ch -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X86-NEXT: orb %ch, %bl -; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload +; X86-NEXT: setne %cl +; X86-NEXT: andb %ch, %cl +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload ; X86-NEXT: orl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: setne %bh +; X86-NEXT: setne %ch ; X86-NEXT: orl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, 4(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, (%ecx) -; X86-NEXT: movl %eax, 8(%ecx) -; X86-NEXT: movl %edx, 12(%ecx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, 4(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, (%esi) +; X86-NEXT: movl %eax, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) ; X86-NEXT: setne %al -; X86-NEXT: andb %bh, %al +; X86-NEXT: andb %ch, %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: orb %bh, %al ; X86-NEXT: orb %bl, %al -; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload ; X86-NEXT: andb $1, %al -; X86-NEXT: movb %al, 16(%ecx) -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movb %al, 16(%esi) +; X86-NEXT: movl %esi, %eax ; X86-NEXT: addl $24, %esp ; X86-NEXT: .cfi_def_cfa_offset 20 ; X86-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/v8i1-masks.ll b/llvm/test/CodeGen/X86/v8i1-masks.ll index 67b7eb48e4cb3..6992b175cc79d 100644 --- a/llvm/test/CodeGen/X86/v8i1-masks.ll +++ b/llvm/test/CodeGen/X86/v8i1-masks.ll @@ -301,9 +301,9 @@ define <8 x i32> @three_ands(<8 x float> %x) { ; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X86-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; @@ -312,9 +312,9 @@ define <8 x i32> @three_ands(<8 x float> %x) { ; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; @@ -377,11 +377,11 @@ define <8 x i32> @four_ands(<8 x float> %x) { ; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X86-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X86-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; @@ -390,11 +390,11 @@ define <8 x i32> @four_ands(<8 x float> %x) { ; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; @@ -407,9 +407,9 @@ define <8 x i32> @four_ands(<8 x float> %x) { ; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 ; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X86-AVX2-NEXT: retl ; @@ -422,9 +422,9 @@ define <8 x i32> @four_ands(<8 x float> %x) { ; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 ; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq ; @@ -467,13 +467,13 @@ define <8 x i32> @five_ands(<8 x float> %x) { ; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 ; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X86-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 ; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X86-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; @@ -482,13 +482,13 @@ define <8 x i32> @five_ands(<8 x float> %x) { ; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 ; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 ; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; @@ -501,12 +501,12 @@ define <8 x i32> @five_ands(<8 x float> %x) { ; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 ; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X86-AVX2-NEXT: retl ; @@ -519,12 +519,12 @@ define <8 x i32> @five_ands(<8 x float> %x) { ; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 ; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq ; @@ -631,9 +631,9 @@ define <8 x i32> @three_or(<8 x float> %x) { ; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X86-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; @@ -642,9 +642,9 @@ define <8 x i32> @three_or(<8 x float> %x) { ; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; @@ -711,11 +711,11 @@ define <8 x i32> @four_or(<8 x float> %x) { ; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X86-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X86-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; @@ -724,11 +724,11 @@ define <8 x i32> @four_or(<8 x float> %x) { ; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; @@ -741,9 +741,9 @@ define <8 x i32> @four_or(<8 x float> %x) { ; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 ; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X86-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X86-AVX2-NEXT: retl ; @@ -756,9 +756,9 @@ define <8 x i32> @four_or(<8 x float> %x) { ; X64-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 ; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq ; @@ -807,13 +807,13 @@ define <8 x i32> @five_or(<8 x float> %x) { ; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 ; X86-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X86-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 ; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X86-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; @@ -822,13 +822,13 @@ define <8 x i32> @five_or(<8 x float> %x) { ; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 ; X64-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 ; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; @@ -841,12 +841,12 @@ define <8 x i32> @five_or(<8 x float> %x) { ; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 ; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X86-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X86-AVX2-NEXT: retl ; @@ -859,12 +859,12 @@ define <8 x i32> @five_or(<8 x float> %x) { ; X64-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 ; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll index 06d390f053c7e..e8686b5a1467c 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll @@ -1167,14 +1167,14 @@ define void @store_i8_stride5_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX-NEXT: vpshufb {{.*#+}} xmm8 = zero,xmm8[6,7],zero,zero,zero,xmm8[8,9],zero,zero,zero,xmm8[10,11],zero,zero,zero ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] ; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[6],zero,zero,zero,xmm2[9,8],zero,zero,zero,xmm2[11,10],zero,zero,zero,xmm2[13,12] -; AVX-NEXT: vpor %xmm2, %xmm8, %xmm2 ; AVX-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,zero,xmm0[3],zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,xmm0[5],zero,zero +; AVX-NEXT: vpor %xmm3, %xmm8, %xmm3 ; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2 ; AVX-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,xmm7[4,5],zero,zero,zero,xmm7[6,7],zero,zero,zero,xmm7[8,9],zero,zero ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,xmm1[5,4],zero,zero,zero,xmm1[7,6],zero,zero,zero,xmm1[9,8] -; AVX-NEXT: vpor %xmm3, %xmm1, %xmm1 -; AVX-NEXT: vpshufb {{.*#+}} xmm3 = zero,xmm0[9],zero,zero,zero,zero,xmm0[10],zero,zero,zero,zero,xmm0[11],zero,zero,zero,zero +; AVX-NEXT: vpshufb {{.*#+}} xmm4 = zero,xmm0[9],zero,zero,zero,zero,xmm0[10],zero,zero,zero,zero,xmm0[11],zero,zero,zero,zero +; AVX-NEXT: vpor %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpor %xmm3, %xmm1, %xmm1 ; AVX-NEXT: vmovdqa %xmm1, 48(%r9) ; AVX-NEXT: vmovdqa %xmm2, 16(%r9) diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index 9cd0f4d12e15a..8b64606f9fbc4 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -1068,11 +1068,11 @@ define i32 @mask_v3i1(<3 x i32> %a, <3 x i32> %b) { ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] ; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: orl %eax, %ecx ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: movd %xmm0, %edx +; SSE2-NEXT: orl %eax, %edx +; SSE2-NEXT: orl %ecx, %edx +; SSE2-NEXT: testb $1, %dl ; SSE2-NEXT: je .LBB27_2 ; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: xorl %eax, %eax diff --git a/llvm/test/CodeGen/X86/win-smallparams.ll b/llvm/test/CodeGen/X86/win-smallparams.ll index 5ca8f6705479f..91bdaa757975d 100644 --- a/llvm/test/CodeGen/X86/win-smallparams.ll +++ b/llvm/test/CodeGen/X86/win-smallparams.ll @@ -79,16 +79,16 @@ define i32 @manyargs(i8 %a, i16 %b, i8 %c, i16 %d, i8 %e, i16 %f) { ; WIN32-MSVC-LABEL: manyargs: ; WIN32-MSVC: # %bb.0: # %entry ; WIN32-MSVC-NEXT: pushl %esi -; WIN32-MSVC-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; WIN32-MSVC-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; WIN32-MSVC-NEXT: addl %eax, %ecx -; WIN32-MSVC-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; WIN32-MSVC-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; WIN32-MSVC-NEXT: addl %eax, %edx -; WIN32-MSVC-NEXT: movswl {{[0-9]+}}(%esp), %esi +; WIN32-MSVC-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; WIN32-MSVC-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-MSVC-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; WIN32-MSVC-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; WIN32-MSVC-NEXT: addl %edx, %esi +; WIN32-MSVC-NEXT: addl %eax, %esi +; WIN32-MSVC-NEXT: movswl {{[0-9]+}}(%esp), %edx ; WIN32-MSVC-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; WIN32-MSVC-NEXT: addl %esi, %eax ; WIN32-MSVC-NEXT: addl %edx, %eax +; WIN32-MSVC-NEXT: addl %esi, %eax ; WIN32-MSVC-NEXT: addl %ecx, %eax ; WIN32-MSVC-NEXT: popl %esi ; WIN32-MSVC-NEXT: retl @@ -98,16 +98,16 @@ define i32 @manyargs(i8 %a, i16 %b, i8 %c, i16 %d, i8 %e, i16 %f) { ; WIN32-GNU-NEXT: pushl %esi ; WIN32-GNU-NEXT: .cfi_def_cfa_offset 8 ; WIN32-GNU-NEXT: .cfi_offset %esi, -8 -; WIN32-GNU-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; WIN32-GNU-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; WIN32-GNU-NEXT: addl %eax, %ecx -; WIN32-GNU-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; WIN32-GNU-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; WIN32-GNU-NEXT: addl %eax, %edx -; WIN32-GNU-NEXT: movswl {{[0-9]+}}(%esp), %esi +; WIN32-GNU-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; WIN32-GNU-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-GNU-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; WIN32-GNU-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; WIN32-GNU-NEXT: addl %edx, %esi +; WIN32-GNU-NEXT: addl %eax, %esi +; WIN32-GNU-NEXT: movswl {{[0-9]+}}(%esp), %edx ; WIN32-GNU-NEXT: movsbl {{[0-9]+}}(%esp), %eax -; WIN32-GNU-NEXT: addl %esi, %eax ; WIN32-GNU-NEXT: addl %edx, %eax +; WIN32-GNU-NEXT: addl %esi, %eax ; WIN32-GNU-NEXT: addl %ecx, %eax ; WIN32-GNU-NEXT: popl %esi ; WIN32-GNU-NEXT: retl diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index 49947eddc61b9..8e3b74eda091a 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -130,12 +130,12 @@ define <4 x i64> @load_factori64_4(ptr %ptr) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 ; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 -; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpaddq %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpaddq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 +; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -816,10 +816,10 @@ define <32 x i8> @interleaved_load_vf32_i8_stride3(ptr %ptr){ ; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[11,12,13,14,15],xmm7[0,1,2,3,4,5,6,7,8,9,10] ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -843,10 +843,10 @@ define <32 x i8> @interleaved_load_vf32_i8_stride3(ptr %ptr){ ; AVX2OR512-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0] ; AVX2OR512-NEXT: # ymm4 = mem[0,1,0,1] ; AVX2OR512-NEXT: vpblendvb %ymm4, %ymm0, %ymm1, %ymm1 -; AVX2OR512-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ; AVX2OR512-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[11,12,13,14,15],ymm3[0,1,2,3,4,5,6,7,8,9,10],ymm0[27,28,29,30,31],ymm3[16,17,18,19,20,21,22,23,24,25,26] ; AVX2OR512-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25] -; AVX2OR512-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; AVX2OR512-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX2OR512-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ; AVX2OR512-NEXT: retq %wide.vec = load <96 x i8>, ptr %ptr %v1 = shufflevector <96 x i8> %wide.vec, <96 x i8> undef,<32 x i32> @@ -873,10 +873,10 @@ define <16 x i8> @interleaved_load_vf16_i8_stride3(ptr %ptr){ ; AVX-NEXT: vpalignr {{.*#+}} xmm2 = xmm3[11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10] ; AVX-NEXT: vpmovsxdq {{.*#+}} xmm4 = [18446744073709551615,16777215] ; AVX-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10] ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] -; AVX-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ; AVX-NEXT: retq %wide.vec = load <48 x i8>, ptr %ptr %v1 = shufflevector <48 x i8> %wide.vec, <48 x i8> undef,<16 x i32> @@ -1425,7 +1425,6 @@ define <64 x i8> @interleaved_load_vf64_i8_stride3(ptr %ptr){ ; AVX2-NEXT: vmovdqa {{.*#+}} ymm11 = [128,128,128,128,128,128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128,128,128,128,128,128,128,18,21,24,27,30] ; AVX2-NEXT: vpshufb %ymm11, %ymm2, %ymm2 ; AVX2-NEXT: vpor %ymm2, %ymm6, %ymm2 -; AVX2-NEXT: vpaddb %ymm2, %ymm9, %ymm2 ; AVX2-NEXT: vpshufb %ymm10, %ymm3, %ymm3 ; AVX2-NEXT: vpshufb %ymm11, %ymm4, %ymm4 ; AVX2-NEXT: vpor %ymm4, %ymm3, %ymm3 @@ -1436,7 +1435,8 @@ define <64 x i8> @interleaved_load_vf64_i8_stride3(ptr %ptr){ ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [255,255,255,255,255,255,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,255,255,255,255,255] ; AVX2-NEXT: # ymm7 = mem[0,1,0,1] ; AVX2-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX2-NEXT: vpaddb %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpaddb %ymm0, %ymm9, %ymm0 ; AVX2-NEXT: vpalignr {{.*#+}} ymm2 = ymm5[5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20] ; AVX2-NEXT: vpshufb %ymm6, %ymm1, %ymm1 ; AVX2-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1 diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll index 2169b39b9dfa0..4b5d65d1f7957 100644 --- a/llvm/test/CodeGen/X86/xmulo.ll +++ b/llvm/test/CodeGen/X86/xmulo.ll @@ -1679,23 +1679,25 @@ define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { ; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: subl $20, %esp +; WIN32-NEXT: subl $16, %esp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl (%eax), %ebx ; WIN32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: movl 4(%eax), %ebp +; WIN32-NEXT: movl 4(%eax), %edx ; WIN32-NEXT: movl %ecx, %eax ; WIN32-NEXT: movl %ecx, %edi ; WIN32-NEXT: sarl $31, %eax ; WIN32-NEXT: movl %eax, %ecx -; WIN32-NEXT: imull %ebp, %ecx +; WIN32-NEXT: imull %edx, %ecx +; WIN32-NEXT: movl %edx, %esi +; WIN32-NEXT: movl %edx, (%esp) # 4-byte Spill ; WIN32-NEXT: mull %ebx -; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill ; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, %ebp +; WIN32-NEXT: addl %eax, %ebx ; WIN32-NEXT: addl %ecx, %ebx -; WIN32-NEXT: movl %ebp, %ecx -; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: movl %esi, %ecx ; WIN32-NEXT: sarl $31, %ecx ; WIN32-NEXT: movl %edi, %esi ; WIN32-NEXT: imull %ecx, %esi @@ -1704,36 +1706,34 @@ define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { ; WIN32-NEXT: movl %edx, %edi ; WIN32-NEXT: addl %eax, %edi ; WIN32-NEXT: addl %esi, %edi -; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload -; WIN32-NEXT: addl %ecx, %ebx -; WIN32-NEXT: addl %eax, %ecx -; WIN32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; WIN32-NEXT: addl %eax, %ebp +; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: adcl %ebx, %edi -; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; WIN32-NEXT: movl %ebp, %eax ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi ; WIN32-NEXT: mull %esi -; WIN32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: movl %edx, %ecx ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: movl (%esp), %eax # 4-byte Reload ; WIN32-NEXT: mull %esi ; WIN32-NEXT: movl %edx, %ebx ; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; WIN32-NEXT: addl %ecx, %esi ; WIN32-NEXT: adcl $0, %ebx -; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: movl %ebp, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) ; WIN32-NEXT: movl %edx, %ecx ; WIN32-NEXT: movl %eax, %ebp ; WIN32-NEXT: addl %esi, %ebp ; WIN32-NEXT: adcl %ebx, %ecx ; WIN32-NEXT: setb %bl -; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; WIN32-NEXT: movl (%esp), %eax # 4-byte Reload ; WIN32-NEXT: mull {{[0-9]+}}(%esp) ; WIN32-NEXT: addl %ecx, %eax ; WIN32-NEXT: movzbl %bl, %ecx ; WIN32-NEXT: adcl %ecx, %edx -; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; WIN32-NEXT: adcl %edi, %edx ; WIN32-NEXT: movl %ebp, %ecx ; WIN32-NEXT: sarl $31, %ecx @@ -1745,7 +1745,7 @@ define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { ; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; WIN32-NEXT: movl %ecx, (%eax) ; WIN32-NEXT: setne %al -; WIN32-NEXT: addl $20, %esp +; WIN32-NEXT: addl $16, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi ; WIN32-NEXT: popl %ebx