@@ -2705,6 +2705,76 @@ void GenSpecificPattern::visitBitCastInst(BitCastInst& I)
27052705 }
27062706}
27072707
2708+ /*
2709+ Matches a pattern where pointer to load instruction is fetched by other load instruction.
2710+ On targets that do not support 64 bit operations, Emu64OpsPass will insert pair_to_ptr intrinsic
2711+ between the loads and InstructionCombining will not optimize this case.
2712+
2713+ This function changes following pattern:
2714+ %3 = load <2 x i32>, <2 x i32> addrspace(1)* %2, align 64
2715+ %4 = extractelement <2 x i32> %3, i32 0
2716+ %5 = extractelement <2 x i32> %3, i32 1
2717+ %6 = call %union._XReq addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* @llvm.genx.GenISA.pair.to.ptr.p1p1p1p1p1p1p1p1union._XReq(i32 %4, i32 %5)
2718+ %7 = bitcast %union._XReq addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* addrspace(1)* %6 to i64 addrspace(1)*
2719+ %8 = bitcast i64 addrspace(1)* %7 to <2 x i32> addrspace(1)*
2720+ %9 = load <2 x i32>, <2 x i32> addrspace(1)* %8, align 64
2721+
2722+ to:
2723+ %3 = bitcast <2 x i32> addrspace(1)* %2 to <2 x i32> addrspace(1)* addrspace(1)*
2724+ %4 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)* addrspace(1)* %3, align 64
2725+ ... dead code
2726+ %11 = load <2 x i32>, <2 x i32> addrspace(1)* %4, align 64
2727+ */
2728+ void GenSpecificPattern::visitLoadInst (LoadInst &LI) {
2729+ Value* PO = LI.getPointerOperand ();
2730+ std::vector<Value*> OneUseValues = { PO };
2731+ while (isa<BitCastInst>(PO)) {
2732+ PO = cast<BitCastInst>(PO)->getOperand (0 );
2733+ OneUseValues.push_back (PO);
2734+ }
2735+
2736+ bool IsPairToPtrInst = (isa<GenIntrinsicInst>(PO) &&
2737+ cast<GenIntrinsicInst>(PO)->getIntrinsicID () ==
2738+ GenISAIntrinsic::GenISA_pair_to_ptr);
2739+
2740+ if (!IsPairToPtrInst)
2741+ return ;
2742+
2743+ // check if this pointer comes from a load.
2744+ auto CallInst = cast<GenIntrinsicInst>(PO);
2745+ auto Op0 = dyn_cast<ExtractElementInst>(CallInst->getArgOperand (0 ));
2746+ auto Op1 = dyn_cast<ExtractElementInst>(CallInst->getArgOperand (1 ));
2747+ bool PointerComesFromALoad = (Op0 && Op1 && isa<ConstantInt>(Op0->getIndexOperand ()) &&
2748+ isa<ConstantInt>(Op1->getIndexOperand ()) &&
2749+ cast<ConstantInt>(Op0->getIndexOperand ())->getZExtValue () == 0 &&
2750+ cast<ConstantInt>(Op1->getIndexOperand ())->getZExtValue () == 1 &&
2751+ isa<LoadInst>(Op0->getVectorOperand ()) &&
2752+ isa<LoadInst>(Op1->getVectorOperand ()) &&
2753+ Op0->getVectorOperand () == Op1->getVectorOperand ());
2754+
2755+ if (!PointerComesFromALoad)
2756+ return ;
2757+
2758+ OneUseValues.insert (OneUseValues.end (), { Op0, Op1 });
2759+
2760+ if (!std::all_of (OneUseValues.begin (), OneUseValues.end (), [](auto v) { return v->hasOneUse (); }))
2761+ return ;
2762+
2763+ auto VectorLoadInst = cast<LoadInst>(Op0->getVectorOperand ());
2764+ if (VectorLoadInst->getNumUses () != 2 )
2765+ return ;
2766+
2767+ auto PointerOperand = VectorLoadInst->getPointerOperand ();
2768+ PointerType* newLoadPointerType = PointerType::get (
2769+ LI.getPointerOperand ()->getType (), PointerOperand->getType ()->getPointerAddressSpace ());
2770+ IRBuilder<> builder (VectorLoadInst);
2771+ auto CastedPointer =
2772+ builder.CreateBitCast (PointerOperand, newLoadPointerType);
2773+ auto NewLoadInst = IGC::cloneLoad (VectorLoadInst, CastedPointer);
2774+
2775+ LI.setOperand (0 , NewLoadInst);
2776+ }
2777+
27082778void GenSpecificPattern::visitZExtInst (ZExtInst& ZEI)
27092779{
27102780 CmpInst* Cmp = dyn_cast<CmpInst>(ZEI.getOperand (0 ));
0 commit comments