Skip to content

Commit 3e2f549

Browse files
kkreczkoigcbot
authored andcommitted
Changes to MismatchDetected
MismatchDetect wasn't detecting type size mismatch in this case ```llvm %0 = alloca [2 x double] %1 = getelementptr inbounds [2 x double], ptr %0, i64 0, i64 0 %2 = load <2 x i32>, ptr %1 ``` As it was comparing number of bits allocated by load instruction type -- <2 x i32> to allocated bits of alloca scalar type -- double, resulting in not detecting size mismatch as 64 == 64. I've changed approach to using LLVM API getScalarSizeInBits() type method to compare scalar sizes, similarily to what was done in typed pointers path (see SOALayoutChecker::visitBitCastInst). Refactored control flow.
1 parent e4b64c1 commit 3e2f549

File tree

2 files changed

+19
-32
lines changed

2 files changed

+19
-32
lines changed

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -558,9 +558,6 @@ bool SOALayoutChecker::visitIntrinsicInst(IntrinsicInst &II) {
558558
// Return true to disable SOA promotion.
559559
bool IGC::SOALayoutChecker::MismatchDetected(Instruction &I) {
560560

561-
if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
562-
return false;
563-
564561
// Skip when we see an i8-based GEP with a non-constant (dynamic) byte offset. The legacy (old) algorithm assumes byte
565562
// offsets map exactly to whole promoted elements (e.g. multiples of the lane size) and cannot safely reconstruct
566563
// sub‑element (inter-lane or unaligned) accesses. Using it would risk incorrect indexing. The new byte-precise
@@ -576,44 +573,28 @@ bool IGC::SOALayoutChecker::MismatchDetected(Instruction &I) {
576573
}
577574
}
578575

579-
// Apply the following mismatch checks only with opaque pointers.
580-
if (!IGC::AreOpaquePointersEnabled())
581-
return false;
582-
583-
if (!pInfo->baseType)
584-
return false;
585-
586576
Type *allocaTy = allocaRef.getAllocatedType();
587577
bool allocaIsVecOrArr = allocaTy->isVectorTy() || allocaTy->isArrayTy();
588-
589578
if (!allocaIsVecOrArr)
590579
return false;
591580

592-
auto DL = I.getParent()->getParent()->getParent()->getDataLayout();
593-
594-
Type *pUserTy = I.getType();
581+
if (auto *pgep = dyn_cast<GetElementPtrInst>(parentLevelInst))
582+
allocaTy = pgep->getResultElementType();
583+
if (auto *arrTy = dyn_cast<ArrayType>(allocaTy))
584+
allocaTy = arrTy->getElementType();
585+
if (auto *vec = dyn_cast<IGCLLVM::FixedVectorType>(allocaTy))
586+
allocaTy = vec->getElementType();
595587

588+
Type *pUserTy = nullptr;
596589
if (auto *storeInst = dyn_cast<StoreInst>(&I))
597590
pUserTy = storeInst->getValueOperand()->getType();
591+
else if (auto *loadInst = dyn_cast<LoadInst>(&I))
592+
pUserTy = loadInst->getType();
593+
else
594+
return false;
598595

599-
if (auto *pgep = dyn_cast<GetElementPtrInst>(parentLevelInst)) {
600-
allocaTy = pgep->getResultElementType();
601-
} else {
602-
if (auto *arrTy = dyn_cast<ArrayType>(allocaTy)) {
603-
allocaTy = arrTy->getElementType();
604-
} else if (auto *vec = dyn_cast<IGCLLVM::FixedVectorType>(allocaTy)) {
605-
allocaTy = vec->getElementType();
606-
}
607-
608-
if (auto *arrTy = dyn_cast<ArrayType>(pUserTy)) {
609-
pUserTy = arrTy->getElementType();
610-
} else if (auto *vec = dyn_cast<IGCLLVM::FixedVectorType>(pUserTy)) {
611-
pUserTy = vec->getElementType();
612-
}
613-
}
614-
615-
auto allocaSize = DL.getTypeAllocSize(allocaTy);
616-
auto vecTySize = DL.getTypeAllocSize(pUserTy);
596+
auto allocaSize = allocaTy->getScalarSizeInBits();
597+
auto vecTySize = pUserTy->getScalarSizeInBits();
617598

618599
if (vecTySize != allocaSize) {
619600
pInfo->canUseSOALayout = false;

IGC/Compiler/tests/PrivateMemoryResolution/SOA_promotion/soa-mismatch-detection.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ exit:
4545
%arr3 = alloca [512 x i32]
4646
%load2 = load i8, ptr %arr3
4747

48+
; This case tests whether load the size of allocas scalar type isn't marked as possible SOA layout
49+
; CHECK: [[LOAD:%.*]] = load <2 x i32>, ptr {{.*}}, align 4
50+
%arr4 = alloca [2 x double], align 8
51+
%gep4 = getelementptr inbounds [2 x double], ptr %arr4, i64 0, i64 0
52+
%load3 = load <2 x i32>, ptr %gep4, align 4
53+
4854
; Case Alloca->Store->Gep->Store: This case is not valid due to different sizes
4955

5056
; CHECK: store <4 x i32> zeroinitializer, ptr {{.*}}

0 commit comments

Comments
 (0)