@@ -35,7 +35,6 @@ IN THE SOFTWARE.
3535#include " GenXTargetMachine.h"
3636#include " GenXUtil.h"
3737#include " GenXVisa.h"
38- #include " vc/GenXCodeGen/GenXInternalMetadata.h"
3938
4039#include " Probe/Assertion.h"
4140#include " llvmWrapper/IR/DerivedTypes.h"
@@ -209,7 +208,6 @@ std::pair<Value *, unsigned>
209208GenXThreadPrivateMemory::NormalizeVector (Value *From, Type *To,
210209 Instruction *Inst) {
211210 Type *I32Ty = Type::getInt32Ty (Inst->getContext ());
212- Type *I64Ty = Type::getInt64Ty (Inst->getContext ());
213211 Value *Res = From;
214212 Type *FromTy = From->getType ();
215213 IGC_ASSERT (isa<VectorType>(FromTy));
@@ -236,22 +234,22 @@ GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
236234 To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
237235 EltSz = I32Ty->getPrimitiveSizeInBits () / genx::ByteBits;
238236 Res = CastInst::Create (Instruction::BitCast, Res, To, " " , Inst);
239- } else if (m_DL->getTypeSizeInBits (cast<VectorType>(To)->getElementType ()) <
240- genx::DWordBits) {
237+ } else if (cast<VectorType>(To)->getElementType ()->getPrimitiveSizeInBits () <
238+ genx::DWordBits
239+ // this is required for correct generation of svm.gather/scatter
240+ // of data of type which size is < i32 because these intrinsics
241+ // infer their block size from the type of the data they handle
242+ && !m_useGlobalMem) {
241243 To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
242- Res = CastInst::CreateZExtOrBitCast (From, To, " " , Inst);
243- } else if (!m_useGlobalMem &&
244- m_DL->getTypeSizeInBits (cast<VectorType>(To)->getElementType ()) ==
245- genx::QWordBits) {
246- if (From->getType ()->getScalarType ()->isPointerTy ()) {
247- auto *NewType = IGCLLVM::FixedVectorType::get (I64Ty, NumElts);
248- From = CastInst::Create (CastInst::PtrToInt, From, NewType, " " , Inst);
249- }
244+
245+ Res = CastInst::Create (Instruction::ZExt, From, To, " " , Inst);
246+ } else if (cast<VectorType>(To)->getElementType ()->getPrimitiveSizeInBits () ==
247+ genx::QWordBits) {
250248 NumElts *= 2 ;
251249 EltSz = I32Ty->getPrimitiveSizeInBits () / genx::ByteBits;
252250 To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
253251
254- Res = CastInst::CreateBitOrPointerCast ( From, To, " " , Inst);
252+ Res = CastInst::Create (Instruction::BitCast, From, To, " " , Inst);
255253 }
256254
257255 return std::make_pair (Res, EltSz);
@@ -260,8 +258,6 @@ GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
260258Instruction *
261259GenXThreadPrivateMemory::RestoreVectorAfterNormalization (Instruction *From,
262260 Type *To) {
263- if (From->getType () == To)
264- return From;
265261 Instruction *Restored = From;
266262 unsigned EltSz = m_DL->getTypeSizeInBits (To->getScalarType ());
267263 IGC_ASSERT (EltSz > 0 );
@@ -523,19 +519,35 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
523519 LdTy = IGCLLVM::FixedVectorType::get (LdTy, 1 );
524520
525521 unsigned NumEltsToLoad = cast<VectorType>(LdTy)->getNumElements ();
526- unsigned ValueEltSz = m_DL->getTypeSizeInBits (LdEltTy) / genx::ByteBits;
522+ unsigned LdEltTySz = m_DL->getTypeSizeInBits (LdEltTy);
523+ if (!(m_useGlobalMem && LdEltTy->isIntegerTy (64 )) &&
524+ LdEltTySz == genx::QWordBits)
525+ NumEltsToLoad *= 2 ;
527526
528527 Value *PredVal = ConstantInt::get (Type::getInt1Ty (*m_ctx), 1 );
529528 Value *Pred = Builder.CreateVectorSplat (NumEltsToLoad, PredVal);
530529
531530 Type *I32Ty = Type::getInt32Ty (*m_ctx);
532531 Type *I64Ty = Type::getInt64Ty (*m_ctx);
532+ Type *TyToLoad = (m_useGlobalMem && LdEltTy->isIntegerTy (64 )) ? I64Ty : I32Ty;
533+ if (LdEltTy->isFloatTy ())
534+ TyToLoad = LdEltTy;
535+ Type *RealTyToLoad = LdEltTy;
536+ if (!(m_useGlobalMem && LdEltTy->isIntegerTy (64 )) &&
537+ m_DL->getTypeSizeInBits (RealTyToLoad) == genx::QWordBits)
538+ RealTyToLoad = I32Ty;
539+ unsigned RealTyToLoadSz =
540+ m_DL->getTypeSizeInBits (RealTyToLoad) / genx::ByteBits;
541+ // we don't want to use improper block sizes for loads of i8/i16
542+ // to make sure we comply with alignment rules for gathers
543+ bool NoExtToDword =
544+ m_useGlobalMem &&
545+ !(LdI->getType ()->isAggregateType () || LdI->getType ()->isVectorTy ()) &&
546+ m_DL->getTypeSizeInBits (LdI->getType ()) < genx::DWordBits;
547+ if (NoExtToDword)
548+ TyToLoad = LdI->getType ();
533549 Value *OldValOfTheDataRead =
534- Builder.CreateVectorSplat (NumEltsToLoad, UndefValue::get (LdEltTy));
535- std::tie (OldValOfTheDataRead, ValueEltSz) =
536- NormalizeVector (OldValOfTheDataRead, LdTy, LdI);
537- NumEltsToLoad =
538- cast<VectorType>(OldValOfTheDataRead->getType ())->getNumElements ();
550+ Builder.CreateVectorSplat (NumEltsToLoad, UndefValue::get (TyToLoad));
539551
540552 Value *PointerOp = LdI->getPointerOperand ();
541553 Value *Offset = lookForPtrReplacement (PointerOp);
@@ -545,13 +557,10 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
545557 ? llvm::GenXIntrinsic::genx_svm_gather
546558 : llvm::GenXIntrinsic::genx_gather_scaled;
547559
548- Value *EltsOffset = FormEltsOffsetVector (NumEltsToLoad, ValueEltSz , LdI);
560+ Value *EltsOffset = FormEltsOffsetVector (NumEltsToLoad, RealTyToLoadSz , LdI);
549561
550- unsigned NumBlocks = m_DL->getTypeSizeInBits (LdEltTy) / genx::ByteBits;
551- // This logic is aligned with the on in CisaBuilder and GenXLowering
552- // The reason behind check for == 2 is that svm intrinsics don't support
553- // BlockSize of 2, so for ops with i16s we have to use BlockSize == 1 and NumBlocks == 2
554- Value *logNumBlocks = ConstantInt::get (I32Ty, genx::log2 (NumBlocks == 2 ? NumBlocks : 1 ));
562+ unsigned SrcSize = genx::log2 (RealTyToLoadSz);
563+ Value *logNumBlocks = ConstantInt::get (I32Ty, m_useGlobalMem ? 0 : SrcSize);
555564 Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
556565 Value *Surface = ConstantInt::get (I32Ty,
557566 visa::getReservedSurfaceIndex (m_stack));
@@ -592,10 +601,6 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
592601 ProperGather = LdVal;
593602 }
594603
595- Gather->setMetadata (InstMD::SVMBlockType,
596- MDNode::get (*m_ctx, llvm::ValueAsMetadata::get (
597- UndefValue::get (LdEltTy))));
598-
599604 LLVM_DEBUG (dbgs () << *Gather << " \n " );
600605 LdI->replaceAllUsesWith (ProperGather);
601606 LdI->eraseFromParent ();
@@ -642,9 +647,7 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
642647 {Pred->getType (),
643648 (m_useGlobalMem ? Offset : EltsOffset)->getType (),
644649 ValueOp->getType ()});
645- unsigned NumBlocks = m_DL->getTypeSizeInBits (ValueOpTy->getScalarType ()) / genx::ByteBits;
646- // see the comment in replaceLoad above
647- Value *logNumBlocks = ConstantInt::get (I32Ty, genx::log2 (NumBlocks == 2 ? NumBlocks : 1 ));
650+ Value *logNumBlocks = ConstantInt::get (I32Ty, m_useGlobalMem ? 0 : genx::log2 (ValueEltSz));
648651 Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
649652 Value *Surface = ConstantInt::get (I32Ty,
650653 visa::getReservedSurfaceIndex (m_stack));
@@ -659,11 +662,6 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
659662 Scatter->insertAfter (StI);
660663 StI->eraseFromParent ();
661664
662- Scatter->setMetadata (
663- InstMD::SVMBlockType,
664- MDNode::get (*m_ctx, llvm::ValueAsMetadata::get (
665- UndefValue::get (ValueOpTy->getScalarType ()))));
666-
667665 LLVM_DEBUG (dbgs () << *Scatter << " \n " );
668666 m_scatter.push_back (Scatter);
669667
@@ -1096,12 +1094,6 @@ void SplitScatter(CallInst *CI) {
10961094 }
10971095 IGC_ASSERT (FirstScatter && SecondScatter);
10981096
1099- auto *MD = CI->getMetadata (InstMD::SVMBlockType);
1100- if (MD) {
1101- FirstScatter->setMetadata (InstMD::SVMBlockType, MD);
1102- SecondScatter->setMetadata (InstMD::SVMBlockType, MD);
1103- }
1104-
11051097 FirstScatter->insertAfter (CI);
11061098 SecondScatter->insertAfter (FirstScatter);
11071099
@@ -1171,12 +1163,6 @@ void SplitGather(CallInst *CI) {
11711163 }
11721164 IGC_ASSERT (FirstGather && SecondGather);
11731165
1174- auto *MD = CI->getMetadata (InstMD::SVMBlockType);
1175- if (MD) {
1176- FirstGather->setMetadata (InstMD::SVMBlockType, MD);
1177- SecondGather->setMetadata (InstMD::SVMBlockType, MD);
1178- }
1179-
11801166 FirstGather->insertAfter (CI);
11811167 SecondGather->insertAfter (FirstGather);
11821168
@@ -1294,16 +1280,14 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
12941280 m_ST = &getAnalysis<TargetPassConfig>()
12951281 .getTM <GenXTargetMachine>()
12961282 .getGenXSubtarget ();
1297- if (!m_ST->isOCLRuntime ())
1298- m_useGlobalMem = false ;
12991283 for (auto &F : M)
13001284 visit (F);
1301- if (m_useGlobalMem ||
1302- (m_ST-> isOCLRuntime () && std::find_if (m_alloca.begin (), m_alloca.end (),
1303- SVMChecker ()) != m_alloca.end () )) {
1285+ if (! m_useGlobalMem &&
1286+ std::find_if (m_alloca.begin (), m_alloca.end (), SVMChecker ()) !=
1287+ m_alloca.end ()) {
13041288 LLVM_DEBUG (dbgs () << " Switching TPM to SVM\n " );
13051289 // TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1306- M.addModuleFlag (Module::ModFlagBehavior::Error, ModuleMD::UseSVMStack , 1 );
1290+ M.addModuleFlag (Module::ModFlagBehavior::Error, " genx.useGlobalMem " , 1 );
13071291 m_useGlobalMem = true ;
13081292 }
13091293 bool Result = false ;
0 commit comments