Skip to content

Commit 13389e6

Browse files
stefan-iligcbot
authored andcommitted
Make MergeAllocas handle different sized arrays
Merge differently sized non-overlapping array allocas.
1 parent 4a4d978 commit 13389e6

File tree

4 files changed

+144
-14
lines changed

4 files changed

+144
-14
lines changed

IGC/AdaptorCommon/RayTracing/MergeAllocas.cpp

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ SPDX-License-Identifier: MIT
1818
#include <llvm/Analysis/LoopInfo.h>
1919
#include <llvm/IR/Constants.h>
2020
#include <llvm/IR/Dominators.h>
21+
#include <llvm/IR/IRBuilder.h>
2122
#include <llvm/IR/InstIterator.h>
2223
#include <llvm/IR/Instructions.h>
2324
#include "common/LLVMWarningsPop.hpp"
@@ -332,7 +333,7 @@ void MergeAllocas::getAnalysisUsage(llvm::AnalysisUsage& AU) const
332333

333334
bool MergeAllocas::runOnFunction(Function& F)
334335
{
335-
if (F.hasOptNone()){
336+
if (skipFunction(F)){
336337
return false;
337338
}
338339

@@ -341,7 +342,7 @@ bool MergeAllocas::runOnFunction(Function& F)
341342
// we group the allocations by type, then sort them into buckets with nonoverlapping liveranges
342343
// can this be generalized into allocas for types of the same size, not only types?
343344
using BucketT = SmallVector<std::pair<Instruction*, AllocationBasedLivenessAnalysis::LivenessData*>>;
344-
DenseMap<std::tuple<llvm::Type*, uint64_t, uint32_t>, SmallVector<BucketT>> buckets;
345+
DenseMap<std::tuple<llvm::Type*, uint32_t, uint32_t>, SmallVector<BucketT>> buckets;
345346

346347
for (const auto& A : ABLA)
347348
{
@@ -353,9 +354,11 @@ bool MergeAllocas::runOnFunction(Function& F)
353354
if (!isa<ConstantInt>(AI->getArraySize()))
354355
continue;
355356

357+
auto* AllocatedType = AI->getAllocatedType();
358+
uint32_t IsArray = AllocatedType->isArrayTy()? 1 : 0;
356359
auto& perTypeBuckets = buckets[std::make_tuple(
357-
AI->getAllocatedType(),
358-
cast<ConstantInt>(AI->getArraySize())->getZExtValue(),
360+
(IsArray)? AllocatedType->getArrayElementType() : AllocatedType,
361+
IsArray,
359362
AI->getAddressSpace()
360363
)];
361364

@@ -379,7 +382,7 @@ bool MergeAllocas::runOnFunction(Function& F)
379382

380383
bool changed = false;
381384

382-
for (const auto& [_, perTypeBuckets] : buckets)
385+
for (const auto& [allocaType, perTypeBuckets] : buckets)
383386
{
384387
for (const auto& bucket : perTypeBuckets)
385388
{
@@ -388,21 +391,36 @@ bool MergeAllocas::runOnFunction(Function& F)
388391
continue;
389392
}
390393

394+
bool IsArray = std::get<1>(allocaType);
391395
Instruction* firstAlloca = nullptr;
396+
if (IsArray)
397+
{
398+
firstAlloca = std::max_element(bucket.begin(), bucket.end(), [](const auto& a, const auto& b) {
399+
return cast<AllocaInst>(a.first)->getAllocatedType()->getArrayNumElements() < cast<AllocaInst>(b.first)->getAllocatedType()->getArrayNumElements();
400+
})->first;
401+
}
402+
else
403+
{
404+
firstAlloca = bucket[0].first;
405+
}
406+
firstAlloca->moveBefore(F.getEntryBlock().getFirstNonPHI());
407+
firstAlloca->setName(VALUE_NAME("MergedAlloca"));
392408
for (const auto& [I, _] : bucket)
393409
{
394-
if (!firstAlloca)
410+
if (firstAlloca == I)
395411
{
396-
firstAlloca = I;
397-
firstAlloca->moveBefore(F.getEntryBlock().getFirstNonPHI());
398-
firstAlloca->setName(VALUE_NAME("MergedAlloca"));
412+
continue;
399413
}
400-
else
414+
auto* ReplacementValue = firstAlloca;
415+
if (firstAlloca->getType() != I->getType())
401416
{
402-
I->replaceAllUsesWith(firstAlloca);
403-
I->eraseFromParent();
404-
}
405-
417+
IRBuilder<> Builder(firstAlloca->getParent());
418+
Builder.SetInsertPoint(firstAlloca->getNextNode());
419+
auto *CastedValue = llvm::cast<Instruction>(Builder.CreateBitCast(firstAlloca, I->getType()));
420+
ReplacementValue = CastedValue;
421+
};
422+
I->replaceAllUsesWith(ReplacementValue);
423+
I->eraseFromParent();
406424
}
407425

408426
changed = true;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt --igc-merge-allocas --igc-private-mem-resolution -S %s --platformpvc | FileCheck %s
9+
; ------------------------------------------------
10+
; PrivateMemoryResolution
11+
; ------------------------------------------------
12+
13+
; Check that array allocas are not merged with regular allocas.
14+
15+
define spir_kernel void @main(float addrspace(1)* %0, i64 %1, i64 %2, i32 %3, i32 %4) {
16+
; CHECK-LABEL: main
17+
; CHECK-NEXT: alloca [128 x float], align 4
18+
; CHECK-NEXT: alloca float, align 4
19+
%6 = alloca [128 x float], align 4
20+
%7 = alloca float, align 4
21+
br label %8
22+
23+
8:
24+
%9 = icmp ult i32 %3, %4
25+
br i1 %9, label %10, label %12
26+
27+
10:
28+
%11 = getelementptr inbounds [128 x float], [128 x float]* %6, i64 0, i64 %1
29+
br label %14
30+
31+
12:
32+
%13 = load float, float* %7, align 4
33+
br label %14
34+
35+
14:
36+
ret void
37+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt --typed-pointers --igc-merge-allocas --igc-private-mem-resolution -S %s --platformpvc | FileCheck %s
9+
; ------------------------------------------------
10+
; PrivateMemoryResolution
11+
; ------------------------------------------------
12+
13+
; Check that allocas are merged and that only largest array is kept.
14+
15+
define spir_kernel void @main(float addrspace(1)* %0, i64 %1, i64 %2, i32 %3, i32 %4) {
16+
; CHECK-LABEL: main
17+
; CHECK-NEXT: alloca [256 x float], align 4
18+
; CHECK-NEXT: bitcast [256 x float]* {{.*}} to [128 x float]*
19+
; CHECK-NOT: alloca [128 x float], align 4
20+
%6 = alloca [128 x float], align 4
21+
%7 = alloca [256 x float], align 4
22+
br label %8
23+
24+
8:
25+
%9 = icmp ult i32 %3, %4
26+
br i1 %9, label %10, label %12
27+
28+
10:
29+
%11 = getelementptr inbounds [128 x float], [128 x float]* %6, i64 0, i64 %1
30+
br label %14
31+
32+
12:
33+
%13 = getelementptr inbounds [256 x float], [256 x float]* %7, i64 0, i64 %2
34+
br label %14
35+
36+
14:
37+
ret void
38+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt --typed-pointers --igc-merge-allocas --igc-private-mem-resolution -S %s --platformpvc | FileCheck %s
9+
; ------------------------------------------------
10+
; PrivateMemoryResolution
11+
; ------------------------------------------------
12+
13+
; Check that allocas are not merged if array elements have different types.
14+
15+
define spir_kernel void @main(float addrspace(1)* %0, i64 %1, i64 %2, i32 %3, i32 %4) {
16+
; CHECK-LABEL: main
17+
; CHECK-NEXT: alloca [128 x float], align 4
18+
; CHECK-NEXT: alloca [256 x i64], align 4
19+
%6 = alloca [128 x float], align 4
20+
%7 = alloca [256 x i64], align 4
21+
br label %8
22+
23+
8:
24+
%9 = icmp ult i32 %3, %4
25+
br i1 %9, label %10, label %12
26+
27+
10:
28+
%11 = getelementptr inbounds [128 x float], [128 x float]* %6, i64 0, i64 %1
29+
br label %14
30+
31+
12:
32+
%13 = getelementptr inbounds [256 x i64], [256 x i64]* %7, i64 0, i64 %2
33+
br label %14
34+
35+
14:
36+
ret void
37+
}

0 commit comments

Comments
 (0)