Skip to content

Commit 2c42b1b

Browse files
michalpaszkowskiigcbot
authored andcommitted
Skip SOA promotion for variable i8 GEPs
Add a bail out in SOALayoutChecker::MismatchDetected to treat i8-based GEPs with non-constant byte indices as a mismatch and disable SOA promotion for those allocas. This avoids incorrect results produced by the legacy scalarization path when byte-wise addressing is used and the offsets are not multiples of the lane size.
1 parent f5bd985 commit 2c42b1b

File tree

2 files changed

+67
-5
lines changed

2 files changed

+67
-5
lines changed

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -554,16 +554,29 @@ bool SOALayoutChecker::visitIntrinsicInst(IntrinsicInst &II) {
554554
return IID == llvm::Intrinsic::lifetime_start || IID == llvm::Intrinsic::lifetime_end;
555555
}
556556

557-
// Detection of mismatch between type sizes of
558-
// alloca -> load / store
559-
// or
560-
// alloca -> gep -> load / store
557+
// Detect size mismatches between an alloca's element and the corresponding load/store element (directly or via a GEP).
558+
// Return true to disable SOA promotion.
561559
bool IGC::SOALayoutChecker::MismatchDetected(Instruction &I) {
562560

563561
if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
564562
return false;
565563

566-
// Only detect mismatch if are have opaque pointers (LLVM>=16)
564+
// Skip when we see an i8-based GEP with a non-constant (dynamic) byte offset. The legacy (old) algorithm assumes byte
565+
// offsets map exactly to whole promoted elements (e.g. multiples of the lane size) and cannot safely reconstruct
566+
// sub‑element (inter-lane or unaligned) accesses. Using it would risk incorrect indexing. The new byte-precise
567+
// algorithm could handle this, but while it is disabled we treat such dynamic i8 GEPs as a mismatch and leave them
568+
// untouched.
569+
for (User *U : allocaRef.users()) {
570+
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
571+
if (GEP->getSourceElementType()->isIntegerTy(8) && GEP->getNumOperands() > 1 &&
572+
!isa<ConstantInt>(GEP->getOperand(1))) {
573+
pInfo->canUseSOALayout = false;
574+
return true;
575+
}
576+
}
577+
}
578+
579+
// Apply the following mismatch checks only with opaque pointers.
567580
if (!IGC::AreOpaquePointersEnabled())
568581
return false;
569582

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt --opaque-pointers -igc-priv-mem-to-reg -S < %s 2>&1 | FileCheck %s
10+
11+
; Verify that byte-wise (i8) GEPs with a non-constant index are NOT scalarized
12+
; by the legacy handleGEPInst path. The pass should skip transforming these to
13+
; avoid incorrect handling of inter-lane (unaligned) byte offsets.
14+
; The new algorithm (when enabled) can handle them, but is not used yet.
15+
16+
; CHECK-LABEL: @test(
17+
; CHECK: for.body:
18+
; CHECK: %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.body ]
19+
; CHECK: %src.gep = getelementptr i8, ptr {{%.*}}, i64 %idx
20+
; CHECK: %dst.gep = getelementptr i8, ptr {{%.*}}, i64 %idx
21+
; CHECK: load i8, ptr %src.gep
22+
; CHECK: store i8
23+
; CHECK-NOT: insertelement
24+
; CHECK-NOT: extractelement
25+
26+
define spir_kernel void @test() {
27+
entry:
28+
%src = alloca [64 x i32], align 4
29+
%dst = alloca [64 x i32], align 4
30+
br label %for.body
31+
32+
for.body: ; preds = %entry, %for.body
33+
%idx = phi i64 [ 0, %entry ], [ %idx.next, %for.body ]
34+
%src.gep = getelementptr i8, ptr %src, i64 %idx
35+
%dst.gep = getelementptr i8, ptr %dst, i64 %idx
36+
%val = load i8, ptr %src.gep, align 1
37+
store i8 %val, ptr %dst.gep, align 1
38+
%idx.next = add nuw i64 %idx, 1
39+
%exitcond = icmp eq i64 %idx.next, 256 ; 256 bytes = 64 * 4
40+
br i1 %exitcond, label %exit, label %for.body
41+
42+
exit:
43+
ret void
44+
}
45+
46+
!igc.functions = !{!1}
47+
!1 = !{ptr @test, !2}
48+
!2 = !{!3}
49+
!3 = !{!"function_type", i32 0}

0 commit comments

Comments
 (0)