Skip to content

Commit 93f6411

Browse files
karolzwolakigcbot
authored andcommitted
GenerateBlockMemOpsPass: Fix handling geps that don't match load/store result type
Prior to this change, these two geps were handled differently even though they point to the same address. ```llvm %struct.foo = type { i32, i32, i32 } ; simdBlockWrite was incorrectly generated for this gep ; ptr to the whole struct %mismatch = getelementptr %struct.foo, ptr addrspace(1) %data, i64 %idx ; but we store to the first field store i32 0, ptr addrspace(1) %mismatch, align 4 ; but here there was no simdBlockWrite generated ; ptr to the first field %field = getelementptr %struct.foo, ptr addrspace(1) %data, i64 %idx, i32 0 store i32 0, ptr addrspace(1) %field, align 4 ```
1 parent b42e4d2 commit 93f6411

File tree

3 files changed

+67
-11
lines changed

3 files changed

+67
-11
lines changed

IGC/Compiler/CISACodeGen/GenerateBlockMemOpsPass.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ bool GenerateBlockMemOpsPass::canOptLoadStore(Instruction *I) {
647647

648648
// Get the last index from the getelementptr instruction if it is not uniform in the subgroup.
649649
Instruction *PtrInstr = dyn_cast<Instruction>(Ptr);
650-
Value *Idx = checkGep(PtrInstr);
650+
Value *Idx = checkGep(PtrInstr, DataType);
651651

652652
if (!Idx)
653653
return false;
@@ -716,7 +716,7 @@ void GenerateBlockMemOpsPass::setAlignmentAttr(CallInst *CI, const unsigned &Ali
716716
CI->addFnAttr(CustomAttr);
717717
}
718718

719-
Value *GenerateBlockMemOpsPass::checkGep(Instruction *PtrInstr) {
719+
Value *GenerateBlockMemOpsPass::checkGep(Instruction *PtrInstr, Type *DataType) {
720720
if (!PtrInstr)
721721
return nullptr;
722722

@@ -755,19 +755,26 @@ Value *GenerateBlockMemOpsPass::checkGep(Instruction *PtrInstr) {
755755
if (WI->isUniform(Ptr))
756756
IsPtrUniform = true;
757757

758+
bool TypesMatch = DataType == Gep->getResultElementType();
759+
Type *Int32Ty = Type::getInt32Ty(*CGCtx->getLLVMContext());
760+
Value *Zero = Constant::getNullValue(Int32Ty);
761+
762+
// If `DataType` doesn't match the GEP result type -- then logically there are implicit zero indices at the end.
763+
// Here it doesn't matter how many zero indices there are.
764+
// If there's at least one implicit zero -- then we have to check all the indexes and the last index will be zero.
765+
auto E = TypesMatch ? Gep->idx_end() - 1 : Gep->idx_end();
766+
Value *LInst = TypesMatch ? *E : Zero;
758767
// Make sure that all indexes, not including the last one, are uniform.
759768
// This is important because the address must be continuous in the subgroup.
760-
for (auto Idx = Gep->idx_begin(), E = Gep->idx_end() - 1; Idx != E; Idx++)
769+
for (auto Idx = Gep->idx_begin(); Idx != E; Idx++)
761770
if (!WI->isUniform(*Idx))
762771
return nullptr;
763772

764-
auto LIndx = Gep->idx_end() - 1;
765-
766-
if (WI->isUniform(*LIndx))
773+
if (WI->isUniform(LInst))
767774
IsLastIndUniform = true;
768775

769776
if (!IsLastIndUniform && IsPtrUniform) {
770-
return *LIndx;
777+
return LInst;
771778
} else if (IsLastIndUniform && !IsPtrUniform) {
772779
if (!isa<PHINode>(Ptr) && !isa<GetElementPtrInst>(Ptr))
773780
return nullptr;
@@ -803,8 +810,8 @@ Value *GenerateBlockMemOpsPass::checkGep(Instruction *PtrInstr) {
803810
}
804811
}
805812

806-
return checkGep(dyn_cast<GetElementPtrInst>(Ptr));
813+
return checkGep(dyn_cast<GetElementPtrInst>(Ptr), DataType);
807814
}
808815

809816
return nullptr;
810-
}
817+
}

IGC/Compiler/CISACodeGen/GenerateBlockMemOpsPass.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class GenerateBlockMemOpsPass : public llvm::FunctionPass {
3838
virtual bool runOnFunction(llvm::Function &F) override;
3939

4040
private:
41-
llvm::Value *checkGep(llvm::Instruction *Gep);
41+
llvm::Value *checkGep(llvm::Instruction *Gep, llvm::Type *DataType);
4242
bool isLocalIdX(const llvm::Value *InputVal);
4343
bool isR0(const llvm::Value *InputVal);
4444
bool isDataTypeSupported(llvm::Value *Ptr, llvm::Type *DataType);
@@ -60,4 +60,4 @@ class GenerateBlockMemOpsPass : public llvm::FunctionPass {
6060
llvm::ScalarEvolution *SE;
6161
size_t SimdSize = 0;
6262
};
63-
} // namespace IGC
63+
} // namespace IGC
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-16-plus
10+
; RUN: igc_opt %s --opaque-pointers --platformpvc --generate-block-mem-ops -S --regkey EnableOpaquePointersBackend=1 | FileCheck %s
11+
; CHECK-NOT: call void @llvm.genx.GenISA.simdBlockWrite
12+
13+
; Make sure that the gep (arrayidx) whose result type (%struct.work_size_data) doesn't match the store type (i32)
14+
; behaves like the gep (arrayidx2) matching the type and they both don't generate simdBlockWrite instruction.
15+
16+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
17+
target triple = "spir64-unknown-unknown"
18+
19+
%struct.work_size_data = type { i32, i32, i32 }
20+
21+
define spir_kernel void @foo(ptr addrspace(1) %data, <8 x i32> %r0, <3 x i32> %globalOffset, <3 x i32> %globalSize, <3 x i32> %localSize, i16 %localIdX, i16 %localIdY, i16 %localIdZ, ptr %privateBase, i32 %bufferOffset) {
22+
entry:
23+
%idxprom = zext i16 %localIdX to i64
24+
%arrayidx = getelementptr %struct.work_size_data, ptr addrspace(1) %data, i64 %idxprom
25+
store i32 0, ptr addrspace(1) %arrayidx, align 4
26+
%arrayidx2 = getelementptr %struct.work_size_data, ptr addrspace(1) %data, i64 %idxprom, i32 0
27+
store i32 0, ptr addrspace(1) %arrayidx2, align 4
28+
ret void
29+
}
30+
31+
!igc.functions = !{!0}
32+
33+
!0 = !{ptr @foo, !1}
34+
!1 = !{!2, !3, !15}
35+
!2 = !{!"function_type", i32 0}
36+
!3 = !{!"implicit_arg_desc", !4, !5, !6, !7, !8, !9, !10, !11, !12, !13}
37+
!4 = !{i32 0}
38+
!5 = !{i32 2}
39+
!6 = !{i32 5}
40+
!7 = !{i32 6}
41+
!8 = !{i32 7}
42+
!9 = !{i32 8}
43+
!10 = !{i32 9}
44+
!11 = !{i32 10}
45+
!12 = !{i32 13}
46+
!13 = !{i32 15, !14}
47+
!14 = !{!"explicit_arg_num", i32 0}
48+
!15 = !{!"thread_group_size", i32 64, i32 1, i32 1}
49+

0 commit comments

Comments
 (0)