|
20 | 20 | #include "Utils/AArch64BaseInfo.h"
|
21 | 21 | #include "llvm/ADT/ArrayRef.h"
|
22 | 22 | #include "llvm/ADT/STLExtras.h"
|
23 |
| -#include "llvm/ADT/SmallSet.h" |
24 | 23 | #include "llvm/ADT/SmallVector.h"
|
25 | 24 | #include "llvm/CodeGen/CFIInstBuilder.h"
|
26 | 25 | #include "llvm/CodeGen/LivePhysRegs.h"
|
|
36 | 35 | #include "llvm/CodeGen/MachineRegisterInfo.h"
|
37 | 36 | #include "llvm/CodeGen/RegisterScavenging.h"
|
38 | 37 | #include "llvm/CodeGen/StackMaps.h"
|
39 |
| -#include "llvm/CodeGen/TargetOpcodes.h" |
40 | 38 | #include "llvm/CodeGen/TargetRegisterInfo.h"
|
41 | 39 | #include "llvm/CodeGen/TargetSubtargetInfo.h"
|
42 | 40 | #include "llvm/IR/DebugInfoMetadata.h"
|
@@ -7354,9 +7352,6 @@ bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
|
7354 | 7352 | case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
|
7355 | 7353 | case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
|
7356 | 7354 | case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
|
7357 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i32: |
7358 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i16: |
7359 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i8: |
7360 | 7355 | return true;
|
7361 | 7356 | } // end switch (Pattern)
|
7362 | 7357 | return false;
|
@@ -7397,252 +7392,11 @@ static bool getMiscPatterns(MachineInstr &Root,
|
7397 | 7392 | return false;
|
7398 | 7393 | }
|
7399 | 7394 |
|
7400 |
| -static bool getGatherPattern(MachineInstr &Root, |
7401 |
| - SmallVectorImpl<unsigned> &Patterns, |
7402 |
| - unsigned LoadLaneOpCode, unsigned NumLanes) { |
7403 |
| - const MachineFunction *MF = Root.getMF(); |
7404 |
| - |
7405 |
| - // Early exit if optimizing for size. |
7406 |
| - if (MF->getFunction().hasMinSize()) |
7407 |
| - return false; |
7408 |
| - |
7409 |
| - const MachineRegisterInfo &MRI = MF->getRegInfo(); |
7410 |
| - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); |
7411 |
| - |
7412 |
| - // The root of the pattern must load into the last lane of the vector. |
7413 |
| - if (Root.getOperand(2).getImm() != NumLanes - 1) |
7414 |
| - return false; |
7415 |
| - |
7416 |
| - // Check that we have load into all lanes except lane 0. |
7417 |
| - // For each load we also want to check that: |
7418 |
| - // 1. It has a single non-debug use (since we will be replacing the virtual |
7419 |
| - // register) |
7420 |
| - // 2. That the addressing mode only uses a single offset register. |
7421 |
| - auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); |
7422 |
| - auto Range = llvm::seq<unsigned>(1, NumLanes - 1); |
7423 |
| - SmallSet<unsigned, 4> RemainingLanes(Range.begin(), Range.end()); |
7424 |
| - while (!RemainingLanes.empty() && CurrInstr && |
7425 |
| - CurrInstr->getOpcode() == LoadLaneOpCode && |
7426 |
| - MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) && |
7427 |
| - CurrInstr->getNumOperands() == 4) { |
7428 |
| - RemainingLanes.erase(CurrInstr->getOperand(2).getImm()); |
7429 |
| - CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg()); |
7430 |
| - } |
7431 |
| - |
7432 |
| - if (!RemainingLanes.empty()) |
7433 |
| - return false; |
7434 |
| - |
7435 |
| - // Match the SUBREG_TO_REG sequence. |
7436 |
| - if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG) |
7437 |
| - return false; |
7438 |
| - |
7439 |
| - // Verify that the subreg to reg loads an integer into the first lane. |
7440 |
| - auto Lane0LoadReg = CurrInstr->getOperand(2).getReg(); |
7441 |
| - unsigned SingleLaneSizeInBits = 128 / NumLanes; |
7442 |
| - if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits) |
7443 |
| - return false; |
7444 |
| - |
7445 |
| - // Verify that it also has a single non debug use. |
7446 |
| - if (!MRI.hasOneNonDBGUse(Lane0LoadReg)) |
7447 |
| - return false; |
7448 |
| - |
7449 |
| - switch (NumLanes) { |
7450 |
| - case 4: |
7451 |
| - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32); |
7452 |
| - break; |
7453 |
| - case 8: |
7454 |
| - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i16); |
7455 |
| - break; |
7456 |
| - case 16: |
7457 |
| - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i8); |
7458 |
| - break; |
7459 |
| - default: |
7460 |
| - llvm_unreachable("Got bad number of lanes for gather pattern."); |
7461 |
| - } |
7462 |
| - |
7463 |
| - return true; |
7464 |
| -} |
7465 |
| - |
7466 |
| -/// Search for patterns where we use LD1 instructions to load into |
7467 |
| -/// separate lanes of an 128 bit Neon register. We can increase Memory Level |
7468 |
| -/// Parallelism by loading into 2 Neon registers instead. |
7469 |
| -static bool getLoadPatterns(MachineInstr &Root, |
7470 |
| - SmallVectorImpl<unsigned> &Patterns) { |
7471 |
| - |
7472 |
| - // The pattern searches for loads into single lanes. |
7473 |
| - switch (Root.getOpcode()) { |
7474 |
| - case AArch64::LD1i32: |
7475 |
| - return getGatherPattern(Root, Patterns, Root.getOpcode(), 4); |
7476 |
| - case AArch64::LD1i16: |
7477 |
| - return getGatherPattern(Root, Patterns, Root.getOpcode(), 8); |
7478 |
| - case AArch64::LD1i8: |
7479 |
| - return getGatherPattern(Root, Patterns, Root.getOpcode(), 16); |
7480 |
| - default: |
7481 |
| - return false; |
7482 |
| - } |
7483 |
| -} |
7484 |
| - |
7485 |
| -static void |
7486 |
| -generateGatherPattern(MachineInstr &Root, |
7487 |
| - SmallVectorImpl<MachineInstr *> &InsInstrs, |
7488 |
| - SmallVectorImpl<MachineInstr *> &DelInstrs, |
7489 |
| - DenseMap<Register, unsigned> &InstrIdxForVirtReg, |
7490 |
| - unsigned Pattern, unsigned NumLanes) { |
7491 |
| - |
7492 |
| - MachineFunction &MF = *Root.getParent()->getParent(); |
7493 |
| - MachineRegisterInfo &MRI = MF.getRegInfo(); |
7494 |
| - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
7495 |
| - |
7496 |
| - // Gather the initial load instructions to build the pattern |
7497 |
| - SmallVector<MachineInstr *, 16> LoadToLaneInstrs; |
7498 |
| - MachineInstr *CurrInstr = &Root; |
7499 |
| - for (unsigned i = 0; i < NumLanes - 1; ++i) { |
7500 |
| - LoadToLaneInstrs.push_back(CurrInstr); |
7501 |
| - CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg()); |
7502 |
| - } |
7503 |
| - |
7504 |
| - // Sort the load instructions according to the lane. |
7505 |
| - llvm::sort(LoadToLaneInstrs, |
7506 |
| - [](const MachineInstr *A, const MachineInstr *B) { |
7507 |
| - return A->getOperand(2).getImm() > B->getOperand(2).getImm(); |
7508 |
| - }); |
7509 |
| - |
7510 |
| - MachineInstr *SubregToReg = CurrInstr; |
7511 |
| - LoadToLaneInstrs.push_back( |
7512 |
| - MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg())); |
7513 |
| - auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs); |
7514 |
| - |
7515 |
| - const TargetRegisterClass *FPR128RegClass = |
7516 |
| - MRI.getRegClass(Root.getOperand(0).getReg()); |
7517 |
| - |
7518 |
| - auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr, |
7519 |
| - Register SrcRegister, unsigned Lane, |
7520 |
| - Register OffsetRegister) { |
7521 |
| - auto NewRegister = MRI.createVirtualRegister(FPR128RegClass); |
7522 |
| - MachineInstrBuilder LoadIndexIntoRegister = |
7523 |
| - BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()), |
7524 |
| - NewRegister) |
7525 |
| - .addReg(SrcRegister) |
7526 |
| - .addImm(Lane) |
7527 |
| - .addReg(OffsetRegister, getKillRegState(true)); |
7528 |
| - InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size())); |
7529 |
| - InsInstrs.push_back(LoadIndexIntoRegister); |
7530 |
| - return NewRegister; |
7531 |
| - }; |
7532 |
| - |
7533 |
| - // Helper to create load instruction based on opcode |
7534 |
| - auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg, |
7535 |
| - Register OffsetReg) -> MachineInstrBuilder { |
7536 |
| - unsigned Opcode; |
7537 |
| - switch (NumLanes) { |
7538 |
| - case 4: |
7539 |
| - Opcode = AArch64::LDRSui; |
7540 |
| - break; |
7541 |
| - case 8: |
7542 |
| - Opcode = AArch64::LDRHui; |
7543 |
| - break; |
7544 |
| - case 16: |
7545 |
| - Opcode = AArch64::LDRBui; |
7546 |
| - break; |
7547 |
| - default: |
7548 |
| - llvm_unreachable( |
7549 |
| - "Got unsupported number of lanes in machine-combiner gather pattern"); |
7550 |
| - } |
7551 |
| - // Immediate offset load |
7552 |
| - return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg) |
7553 |
| - .addReg(OffsetReg) |
7554 |
| - .addImm(0); // immediate offset |
7555 |
| - }; |
7556 |
| - |
7557 |
| - // Load the remaining lanes into register 0. |
7558 |
| - auto LanesToLoadToReg0 = |
7559 |
| - llvm::make_range(LoadToLaneInstrsAscending.begin() + 1, |
7560 |
| - LoadToLaneInstrsAscending.begin() + NumLanes / 2); |
7561 |
| - auto PrevReg = SubregToReg->getOperand(0).getReg(); |
7562 |
| - for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) { |
7563 |
| - PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1, |
7564 |
| - LoadInstr->getOperand(3).getReg()); |
7565 |
| - DelInstrs.push_back(LoadInstr); |
7566 |
| - } |
7567 |
| - auto LastLoadReg0 = PrevReg; |
7568 |
| - |
7569 |
| - // First load into register 1. Perform a LDRSui to zero out the upper lanes in |
7570 |
| - // a single instruction. |
7571 |
| - auto Lane0Load = *LoadToLaneInstrsAscending.begin(); |
7572 |
| - auto OriginalSplitLoad = |
7573 |
| - *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2); |
7574 |
| - auto DestRegForMiddleIndex = MRI.createVirtualRegister( |
7575 |
| - MRI.getRegClass(Lane0Load->getOperand(0).getReg())); |
7576 |
| - |
7577 |
| - MachineInstrBuilder MiddleIndexLoadInstr = |
7578 |
| - CreateLoadInstruction(NumLanes, DestRegForMiddleIndex, |
7579 |
| - OriginalSplitLoad->getOperand(3).getReg()); |
7580 |
| - |
7581 |
| - InstrIdxForVirtReg.insert( |
7582 |
| - std::make_pair(DestRegForMiddleIndex, InsInstrs.size())); |
7583 |
| - InsInstrs.push_back(MiddleIndexLoadInstr); |
7584 |
| - DelInstrs.push_back(OriginalSplitLoad); |
7585 |
| - |
7586 |
| - // Subreg To Reg instruction for register 1. |
7587 |
| - auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass); |
7588 |
| - unsigned SubregType; |
7589 |
| - switch (NumLanes) { |
7590 |
| - case 4: |
7591 |
| - SubregType = AArch64::ssub; |
7592 |
| - break; |
7593 |
| - case 8: |
7594 |
| - SubregType = AArch64::hsub; |
7595 |
| - break; |
7596 |
| - case 16: |
7597 |
| - SubregType = AArch64::bsub; |
7598 |
| - break; |
7599 |
| - default: |
7600 |
| - llvm_unreachable( |
7601 |
| - "Got invalid NumLanes for machine-combiner gather pattern"); |
7602 |
| - } |
7603 |
| - |
7604 |
| - auto SubRegToRegInstr = |
7605 |
| - BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()), |
7606 |
| - DestRegForSubregToReg) |
7607 |
| - .addImm(0) |
7608 |
| - .addReg(DestRegForMiddleIndex, getKillRegState(true)) |
7609 |
| - .addImm(SubregType); |
7610 |
| - InstrIdxForVirtReg.insert( |
7611 |
| - std::make_pair(DestRegForSubregToReg, InsInstrs.size())); |
7612 |
| - InsInstrs.push_back(SubRegToRegInstr); |
7613 |
| - |
7614 |
| - // Load remaining lanes into register 1. |
7615 |
| - auto LanesToLoadToReg1 = |
7616 |
| - llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1, |
7617 |
| - LoadToLaneInstrsAscending.end()); |
7618 |
| - PrevReg = SubRegToRegInstr->getOperand(0).getReg(); |
7619 |
| - for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) { |
7620 |
| - PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1, |
7621 |
| - LoadInstr->getOperand(3).getReg()); |
7622 |
| - if (Index == NumLanes / 2 - 2) { |
7623 |
| - break; |
7624 |
| - } |
7625 |
| - DelInstrs.push_back(LoadInstr); |
7626 |
| - } |
7627 |
| - auto LastLoadReg1 = PrevReg; |
7628 |
| - |
7629 |
| - // Create the final zip instruction to combine the results. |
7630 |
| - MachineInstrBuilder ZipInstr = |
7631 |
| - BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64), |
7632 |
| - Root.getOperand(0).getReg()) |
7633 |
| - .addReg(LastLoadReg0) |
7634 |
| - .addReg(LastLoadReg1); |
7635 |
| - InsInstrs.push_back(ZipInstr); |
7636 |
| -} |
7637 |
| - |
7638 | 7395 | CombinerObjective
|
7639 | 7396 | AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
|
7640 | 7397 | switch (Pattern) {
|
7641 | 7398 | case AArch64MachineCombinerPattern::SUBADD_OP1:
|
7642 | 7399 | case AArch64MachineCombinerPattern::SUBADD_OP2:
|
7643 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i32: |
7644 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i16: |
7645 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i8: |
7646 | 7400 | return CombinerObjective::MustReduceDepth;
|
7647 | 7401 | default:
|
7648 | 7402 | return TargetInstrInfo::getCombinerObjective(Pattern);
|
@@ -7672,10 +7426,6 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
|
7672 | 7426 | if (getMiscPatterns(Root, Patterns))
|
7673 | 7427 | return true;
|
7674 | 7428 |
|
7675 |
| - // Load patterns |
7676 |
| - if (getLoadPatterns(Root, Patterns)) |
7677 |
| - return true; |
7678 |
| - |
7679 | 7429 | return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
|
7680 | 7430 | DoRegPressureReduce);
|
7681 | 7431 | }
|
@@ -8931,21 +8681,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
|
8931 | 8681 | MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
|
8932 | 8682 | break;
|
8933 | 8683 | }
|
8934 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i32: { |
8935 |
| - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, |
8936 |
| - Pattern, 4); |
8937 |
| - break; |
8938 |
| - } |
8939 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i16: { |
8940 |
| - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, |
8941 |
| - Pattern, 8); |
8942 |
| - break; |
8943 |
| - } |
8944 |
| - case AArch64MachineCombinerPattern::GATHER_LANE_i8: { |
8945 |
| - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, |
8946 |
| - Pattern, 16); |
8947 |
| - break; |
8948 |
| - } |
8949 | 8684 |
|
8950 | 8685 | } // end switch (Pattern)
|
8951 | 8686 | // Record MUL and ADD/SUB for deletion
|
|
0 commit comments