@@ -96,6 +96,9 @@ namespace
9696 void generalGroupI8Stream (
9797 LLVMContext& C, uint32_t NumI8, uint32_t Align,
9898 uint32_t & NumI32, Type** Vecs, uint32_t & L, uint32_t BaseTypeSize);
99+ // support function for replaceCountTheLeadingZeros
100+ Value* evaluateCtlzUpto32bit (IGCLLVM::IRBuilder<>* Builder, Value* inVal, Type* singleElementType, Value* canBePoison);
101+ Value* evaluateCtlz64bit (IGCLLVM::IRBuilder<>* Builder, Value* inVal, Type* singleElementType, Value* canBePoison);
99102
100103 // / replace member function
101104 void replaceMemcpy (IntrinsicInst* I);
@@ -104,6 +107,7 @@ namespace
104107 void replaceExpect (IntrinsicInst* I);
105108 void replaceFunnelShift (IntrinsicInst* I);
106109 void replaceLRound (IntrinsicInst* I);
110+ void replaceCountTheLeadingZeros (IntrinsicInst* I);
107111
108112 static const std::map< Intrinsic::ID, MemFuncPtr_t > m_intrinsicToFunc;
109113 };
@@ -129,7 +133,8 @@ const std::map< Intrinsic::ID, ReplaceUnsupportedIntrinsics::MemFuncPtr_t > Repl
129133 { Intrinsic::memmove, &ReplaceUnsupportedIntrinsics::replaceMemMove },
130134 { Intrinsic::expect, &ReplaceUnsupportedIntrinsics::replaceExpect },
131135 { Intrinsic::lround, &ReplaceUnsupportedIntrinsics::replaceLRound },
132- { Intrinsic::llround, &ReplaceUnsupportedIntrinsics::replaceLRound }
136+ { Intrinsic::llround, &ReplaceUnsupportedIntrinsics::replaceLRound },
137+ { Intrinsic::ctlz, &ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros }
133138};
134139
135140ReplaceUnsupportedIntrinsics::ReplaceUnsupportedIntrinsics () : FunctionPass(ID)
@@ -985,6 +990,113 @@ void ReplaceUnsupportedIntrinsics::replaceLRound(IntrinsicInst* I) {
985990 I->eraseFromParent ();
986991}
987992
993+ /*
994+ Replaces llvm.ctlz.* intrinsics (count the leading zeros)
995+ to llvm.ctlz.i32 because we support llvm.ctlz intrinsic
996+ only with source type i32.
997+
998+ E.g.
999+ %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %0, i1 false)
1000+ ret <2 x i8> %1
1001+ =>
1002+ %1 = extractelement <2 x i8> %0, i64 0
1003+ %2 = zext i8 %1 to i32
1004+ %3 = call i32 @llvm.ctlz.i32(i32 %2, i1 false)
1005+ %4 = trunc i32 %3 to i8
1006+ %5 = add nsw i8 %4, -24
1007+ %6 = insertelement <2 x i8> undef, i8 %5, i32 0
1008+ %7 = extractelement <2 x i8> %0, i64 1
1009+ %8 = zext i8 %7 to i32
1010+ %9 = call i32 @llvm.ctlz.i32(i32 %8, i1 false)
1011+ %10 = trunc i32 %9 to i8
1012+ %11 = add nsw i8 %10, -24
1013+ %12 = insertelement <2 x i8> %6, i8 %11, i32 1
1014+ %13 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %0, i1 false)
1015+ ret <2 x i8> %12
1016+ */
1017+ void ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros (IntrinsicInst* I) {
1018+ IGC_ASSERT (I->getIntrinsicID () == Intrinsic::ctlz);
1019+
1020+ Type* oldIntrinsicDstType = I->getType ();
1021+ Type* singleElementType = oldIntrinsicDstType;
1022+ uint32_t numOfElements = 1 ;
1023+ bool isVector = oldIntrinsicDstType->isVectorTy ();
1024+
1025+ if (isVector)
1026+ {
1027+ auto oldIntrinsicDstTypeFVT = dyn_cast<IGCLLVM::FixedVectorType>(oldIntrinsicDstType);
1028+ numOfElements = (uint32_t )oldIntrinsicDstTypeFVT->getNumElements ();
1029+ singleElementType = oldIntrinsicDstTypeFVT->getElementType ();
1030+ }
1031+
1032+ int singleElementSizeInBits = singleElementType->getScalarSizeInBits ();
1033+
1034+ IGC_ASSERT_MESSAGE (singleElementSizeInBits == 8 || singleElementSizeInBits == 16 ||
1035+ singleElementSizeInBits == 32 || singleElementSizeInBits == 64 ,
1036+ " Currently for Intrinsic::ctlz we support source bit size: 8,16,32,64" );
1037+
1038+ // noting to replace, early return
1039+ if (!isVector && singleElementSizeInBits == 32 ) return ;
1040+
1041+ bool bitSizeLowerThan32 = singleElementSizeInBits < 32 ;
1042+ bool bitSizeEqual64 = singleElementSizeInBits == 64 ;
1043+
1044+ IGCLLVM::IRBuilder<> Builder (I);
1045+
1046+ Value* inputVal = I->getArgOperand (0 );
1047+ Value* canBePoison = I->getArgOperand (1 );
1048+ Value* outputVal = llvm::UndefValue::get (oldIntrinsicDstType); // Will be overwritten in scalar case.
1049+ Value* retVal = inputVal;
1050+
1051+ for (uint32_t i = 0 ; i < numOfElements; i++)
1052+ {
1053+ if (isVector) retVal = Builder.CreateExtractElement (inputVal, i);
1054+
1055+ if (bitSizeLowerThan32)
1056+ retVal = evaluateCtlzUpto32bit (&Builder, retVal, singleElementType, canBePoison);
1057+ else if (bitSizeEqual64)
1058+ retVal = evaluateCtlz64bit (&Builder, retVal, singleElementType, canBePoison);
1059+
1060+ if (singleElementSizeInBits == 32 )
1061+ retVal = Builder.CreateIntrinsic (Intrinsic::ctlz, { Builder.getInt32Ty () }, { retVal, canBePoison });
1062+
1063+ if (isVector)
1064+ outputVal = Builder.CreateInsertElement (outputVal, retVal, Builder.getInt32 (i));
1065+ else // for scalar type
1066+ outputVal = retVal;
1067+ }
1068+ I->replaceAllUsesWith (outputVal);
1069+ }
1070+
1071+ Value* ReplaceUnsupportedIntrinsics::evaluateCtlzUpto32bit (IGCLLVM::IRBuilder<>* Builder, Value* inVal, Type* singleElementType, Value* canBePoison) {
1072+ int sizeInBits = singleElementType->getScalarSizeInBits ();
1073+ Value* retVal = Builder->CreateZExt (inVal, Builder->getInt32Ty ());
1074+ retVal = Builder->CreateIntrinsic (Intrinsic::ctlz, { Builder->getInt32Ty () }, { retVal, canBePoison });
1075+ retVal = Builder->CreateTrunc (retVal, singleElementType);
1076+ auto constInt = Builder->getIntN (sizeInBits, sizeInBits - 32 );
1077+ retVal = Builder->CreateNSWAdd (retVal, constInt);
1078+ return retVal;
1079+ }
1080+
1081+ Value* ReplaceUnsupportedIntrinsics::evaluateCtlz64bit (IGCLLVM::IRBuilder<>* Builder, Value* inVal, Type* singleElementType, Value* canBePoison) {
1082+ Value* lowBits = Builder->CreateTrunc (inVal, Builder->getInt32Ty ());
1083+ lowBits = Builder->CreateIntrinsic (Intrinsic::ctlz, { Builder->getInt32Ty () }, { lowBits, canBePoison });
1084+
1085+ Value* hiBits = Builder->CreateLShr (inVal, 32 );
1086+ hiBits = Builder->CreateTrunc (hiBits, Builder->getInt32Ty ());
1087+ hiBits = Builder->CreateIntrinsic (Intrinsic::ctlz, { Builder->getInt32Ty () }, { hiBits, canBePoison });
1088+
1089+ auto maxValueIn32BitsPlusOne = Builder->getInt64 ((uint64_t )(0xffffffff ) + 1 ); // maxValueIn32Bits + 1
1090+ Value* cmp = Builder->CreateICmp (CmpInst::Predicate::ICMP_ULT, inVal, maxValueIn32BitsPlusOne);
1091+
1092+ auto constInt = Builder->getInt32 (32 );
1093+ lowBits = Builder->CreateAdd (lowBits, constInt);
1094+
1095+ Value* retVal = Builder->CreateSelect (cmp, lowBits, hiBits);
1096+ retVal = Builder->CreateZExt (retVal, singleElementType);
1097+ return retVal;
1098+ }
1099+
9881100void ReplaceUnsupportedIntrinsics::visitIntrinsicInst (IntrinsicInst& I) {
9891101 if (m_intrinsicToFunc.find (I.getIntrinsicID ()) != m_intrinsicToFunc.end ()) {
9901102 m_instsToReplace.push_back (&I);
0 commit comments