-
Notifications
You must be signed in to change notification settings - Fork 15k
[PPC]Optimize zeroing accumulator and spilling instructions into simple instructions #96094
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,6 +109,93 @@ static bool hasPCRelativeForm(MachineInstr &Use) { | |
| MachineFunctionProperties::Property::NoVRegs); | ||
| } | ||
|
|
||
| // The funtion will simply the zeroing accumulator and spilling instrcutions | ||
| // into simple xxlxor and spilling instrcuctions. | ||
| // From: | ||
| // setaccz acci | ||
| // xxmfacc acci | ||
| // stxv vsr(i*4+0), D(1) | ||
| // stxv vsr(i*4+1), D-16(1) | ||
| // stxv vsr(i*4+2), D-32(1) | ||
| // stxv vsr(i*4+3), D-48(1) | ||
|
|
||
| // To: | ||
| // xxlxor vsr(i*4), 0, 0 | ||
| // stxv vsr(i*4), D(1) | ||
| // stxv vsr(i*4), D-16(1) | ||
| // stxv vsr(i*4), D-32(1) | ||
| // stxv vsr(i*4), D-48(1) | ||
| bool | ||
| OptimizeZeroingAccumulatorSpilling(MachineBasicBlock &MBB, | ||
| const TargetRegisterInfo *TRI) const { | ||
| bool changed = false; | ||
|
||
| for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { | ||
| if (BBI->getOpcode() != PPC::XXSETACCZ) | ||
| continue; | ||
|
|
||
| Register ACCZReg = BBI->getOperand(0).getReg(); | ||
|
|
||
| DenseSet<MachineInstr *> InstrsToErase; | ||
| InstrsToErase.insert(&*BBI++); | ||
|
|
||
|
||
| if (BBI->getOpcode() != PPC::XXMFACC) { | ||
| --BBI; | ||
| continue; | ||
| } | ||
|
|
||
| Register ACCWReg = BBI->getOperand(0).getReg(); | ||
|
|
||
| if (ACCWReg != ACCZReg) | ||
| continue; | ||
|
|
||
| auto XXMFACCInstr = BBI; | ||
| InstrsToErase.insert(&*BBI++); | ||
|
|
||
| Register VSLRegBase = (ACCWReg - PPC::ACC0) * 4 + PPC::VSL0; | ||
| bool isVSLRegBaseKilled = false; | ||
| for (unsigned InstrCount = 0; InstrCount < 4; ++InstrCount, ++BBI) { | ||
| if (BBI->getOpcode() == PPC::STXV) { | ||
| Register Reg0 = BBI->getOperand(0).getReg(); | ||
| // If the VSLRegBase Register is killed, we put the kill in the | ||
| // last STXV instruction. | ||
|
||
| if (Reg0 == VSLRegBase && BBI->getOperand(0).isKill()) | ||
| isVSLRegBaseKilled = true; | ||
| if (Reg0 < VSLRegBase || Reg0 > VSLRegBase + 3) | ||
| continue; | ||
| } else { | ||
| --BBI; | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| BBI = XXMFACCInstr; | ||
| BBI++; | ||
| for (unsigned InstrCount = 0; InstrCount < 4; ++InstrCount, ++BBI) { | ||
| Register VSLiReg = BBI->getOperand(0).getReg(); | ||
| BBI->substituteRegister(VSLiReg, VSLRegBase, 0, *TRI); | ||
| BBI->getOperand(0).setIsKill(false); | ||
| } | ||
|
|
||
| if (isVSLRegBaseKilled) | ||
| (--BBI)->getOperand(0).setIsKill(true); | ||
|
|
||
| DebugLoc DL = XXMFACCInstr->getDebugLoc(); | ||
| const PPCInstrInfo *TII = XXMFACCInstr->getMF() | ||
| ->getSubtarget<PPCSubtarget>() | ||
| .getInstrInfo(); | ||
|
|
||
| BuildMI(MBB, &*XXMFACCInstr, DL, TII->get(PPC::XXLXOR), VSLRegBase) | ||
| .addReg(VSLRegBase,RegState::Undef) | ||
| .addReg(VSLRegBase,RegState::Undef); | ||
|
|
||
| for (MachineInstr *MI : InstrsToErase) | ||
| MI->eraseFromParent(); | ||
|
||
|
|
||
| changed |= true; | ||
| } | ||
| return changed; | ||
| } | ||
|
|
||
| // This function removes any redundant load immediates. It has two level | ||
| // loops - The outer loop finds the load immediates BBI that could be used | ||
| // to replace following redundancy. The inner loop scans instructions that | ||
|
|
@@ -466,6 +553,7 @@ static bool hasPCRelativeForm(MachineInstr &Use) { | |
| Changed |= removeRedundantLIs(MBB, TRI); | ||
| Changed |= addLinkerOpt(MBB, TRI); | ||
| Changed |= removeAccPrimeUnprime(MBB); | ||
| Changed |= OptimizeZeroingAccumulatorSpilling(MBB, TRI); | ||
| for (MachineInstr &MI : MBB) { | ||
| unsigned Opc = MI.getOpcode(); | ||
| if (Opc == PPC::UNENCODED_NOP) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -115,22 +115,20 @@ declare <512 x i1> @llvm.ppc.mma.xxsetaccz() | |
| define void @int_xxsetaccz(ptr %ptr) { | ||
| ; CHECK-LABEL: int_xxsetaccz: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: xxsetaccz acc0 | ||
| ; CHECK-NEXT: xxmfacc acc0 | ||
| ; CHECK-NEXT: xxlxor vs0, vs0, vs0 | ||
| ; CHECK-NEXT: stxv vs0, 48(r3) | ||
| ; CHECK-NEXT: stxv vs1, 32(r3) | ||
| ; CHECK-NEXT: stxv vs2, 16(r3) | ||
| ; CHECK-NEXT: stxv vs3, 0(r3) | ||
| ; CHECK-NEXT: stxv vs0, 32(r3) | ||
| ; CHECK-NEXT: stxv vs0, 16(r3) | ||
| ; CHECK-NEXT: stxv vs0, 0(r3) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not related to this patch, the case at line 50 seems like an existing bug... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry that I can not got the comment, can you explain more detail ? |
||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: int_xxsetaccz: | ||
| ; CHECK-BE: # %bb.0: # %entry | ||
| ; CHECK-BE-NEXT: xxsetaccz acc0 | ||
| ; CHECK-BE-NEXT: xxmfacc acc0 | ||
| ; CHECK-BE-NEXT: stxv vs1, 16(r3) | ||
| ; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 | ||
| ; CHECK-BE-NEXT: stxv vs0, 16(r3) | ||
| ; CHECK-BE-NEXT: stxv vs0, 0(r3) | ||
| ; CHECK-BE-NEXT: stxv vs3, 48(r3) | ||
| ; CHECK-BE-NEXT: stxv vs2, 32(r3) | ||
| ; CHECK-BE-NEXT: stxv vs0, 48(r3) | ||
| ; CHECK-BE-NEXT: stxv vs0, 32(r3) | ||
| ; CHECK-BE-NEXT: blr | ||
| entry: | ||
| %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() | ||
|
|
@@ -143,22 +141,20 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble | |
| define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) { | ||
| ; CHECK-LABEL: disass_acc: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: xxsetaccz acc0 | ||
| ; CHECK-NEXT: xxmfacc acc0 | ||
| ; CHECK-NEXT: stxv vs3, 0(r3) | ||
| ; CHECK-NEXT: stxv vs2, 0(r4) | ||
| ; CHECK-NEXT: stxv vs1, 0(r5) | ||
| ; CHECK-NEXT: xxlxor vs0, vs0, vs0 | ||
| ; CHECK-NEXT: stxv vs0, 0(r3) | ||
| ; CHECK-NEXT: stxv vs0, 0(r4) | ||
| ; CHECK-NEXT: stxv vs0, 0(r5) | ||
| ; CHECK-NEXT: stxv vs0, 0(r6) | ||
| ; CHECK-NEXT: blr | ||
| ; | ||
| ; CHECK-BE-LABEL: disass_acc: | ||
| ; CHECK-BE: # %bb.0: # %entry | ||
| ; CHECK-BE-NEXT: xxsetaccz acc0 | ||
| ; CHECK-BE-NEXT: xxmfacc acc0 | ||
| ; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 | ||
| ; CHECK-BE-NEXT: stxv vs0, 0(r3) | ||
| ; CHECK-BE-NEXT: stxv vs1, 0(r4) | ||
| ; CHECK-BE-NEXT: stxv vs2, 0(r5) | ||
| ; CHECK-BE-NEXT: stxv vs3, 0(r6) | ||
| ; CHECK-BE-NEXT: stxv vs0, 0(r4) | ||
| ; CHECK-BE-NEXT: stxv vs0, 0(r5) | ||
| ; CHECK-BE-NEXT: stxv vs0, 0(r6) | ||
| ; CHECK-BE-NEXT: blr | ||
| entry: | ||
| %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does
D(1)mean? 1 is for R1? could we use another name likeBase? And we don't check the offsets in the implementation, so maybe the D/D-16/D-32/D-48 is also not necessary?