Skip to content

Commit e980945

Browse files
committed
[NVPTX][InferAS] assume alloca insturctions are in local AS
1 parent 3026eca commit e980945

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/IR/Value.h"
2121
#include "llvm/Support/Casting.h"
2222
#include "llvm/Support/ErrorHandling.h"
23+
#include "llvm/Support/NVPTXAddrSpace.h"
2324
#include "llvm/Transforms/InstCombine/InstCombiner.h"
2425
#include <optional>
2526
using namespace llvm;
@@ -562,4 +563,11 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
562563
}
563564
}
564565
return nullptr;
565-
}
566+
}
567+
568+
unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
569+
if (isa<AllocaInst>(V))
570+
return ADDRESS_SPACE_LOCAL;
571+
572+
return -1;
573+
}

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
129129

130130
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
131131
Value *NewV) const;
132+
133+
unsigned getAssumedAddrSpace(const Value *V) const;
132134
};
133135

134136
} // end namespace llvm
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=infer-address-spaces %s | FileCheck %s
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
7+
define float @load_alloca() {
8+
; CHECK-LABEL: define float @load_alloca() {
9+
; CHECK-NEXT: [[ADDR:%.*]] = alloca float, align 4
10+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
11+
; CHECK-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[TMP1]], align 4
12+
; CHECK-NEXT: ret float [[VAL]]
13+
;
14+
%addr = alloca float
15+
%val = load float, ptr %addr
16+
ret float %val
17+
}

0 commit comments

Comments
 (0)