Skip to content

Commit 4ea71e1

Browse files
committed
[AMDGPU] Preserve noundef and range during kernel argument loads
This commit ensures than noundef (which is frequently a prerequisite for other annotations) and range() annotations on kernel arguments are copied onto their corresponding load from the kernel argument structure.
1 parent 3d43789 commit 4ea71e1

File tree

2 files changed

+46
-14
lines changed

2 files changed

+46
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "GCNSubtarget.h"
1616
#include "llvm/Analysis/ValueTracking.h"
1717
#include "llvm/CodeGen/TargetPassConfig.h"
18+
#include "llvm/IR/Attributes.h"
1819
#include "llvm/IR/IRBuilder.h"
1920
#include "llvm/IR/IntrinsicsAMDGPU.h"
2021
#include "llvm/IR/MDBuilder.h"
@@ -416,6 +417,16 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
416417

417418
MDBuilder MDB(Ctx);
418419

420+
if (Arg.hasAttribute(Attribute::NoUndef))
421+
Load->setMetadata(LLVMContext::MD_noundef, MDNode::get(Ctx, {}));
422+
423+
if (Arg.hasAttribute(Attribute::Range)) {
424+
const ConstantRange &Range =
425+
Arg.getAttribute(Attribute::Range).getValueAsConstantRange();
426+
Load->setMetadata(LLVMContext::MD_range,
427+
MDB.createRange(Range.getLower(), Range.getUpper()));
428+
}
429+
419430
if (isa<PointerType>(ArgTy)) {
420431
if (Arg.hasNonNullAttr())
421432
Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {}));

llvm/test/CodeGen/AMDGPU/lower-kernargs.ll

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,25 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
255255
ret void
256256
}
257257

258+
define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %arg0) {
259+
; HSA-LABEL: @kern_range_noundef_i32(
260+
; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
261+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0
262+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
263+
; HSA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4
264+
; HSA-NEXT: ret void
265+
;
266+
; MESA-LABEL: @kern_range_noundef_i32(
267+
; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
268+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36
269+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
270+
; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4
271+
; MESA-NEXT: ret void
272+
;
273+
store volatile i32 %arg0, ptr addrspace(1) poison
274+
ret void
275+
}
276+
258277
define amdgpu_kernel void @kern_f32(float %arg0) {
259278
; HSA-LABEL: @kern_f32(
260279
; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -1022,14 +1041,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) deref
10221041
; HSA-LABEL: @kern_global_ptr_dereferencable(
10231042
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10241043
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
1025-
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META2:![0-9]+]]
1044+
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
10261045
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10271046
; HSA-NEXT: ret void
10281047
;
10291048
; MESA-LABEL: @kern_global_ptr_dereferencable(
10301049
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10311050
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
1032-
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META2:![0-9]+]]
1051+
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
10331052
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10341053
; MESA-NEXT: ret void
10351054
;
@@ -1041,14 +1060,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(
10411060
; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
10421061
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10431062
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
1044-
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META3:![0-9]+]]
1063+
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
10451064
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10461065
; HSA-NEXT: ret void
10471066
;
10481067
; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
10491068
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10501069
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
1051-
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META3:![0-9]+]]
1070+
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
10521071
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10531072
; MESA-NEXT: ret void
10541073
;
@@ -1079,14 +1098,14 @@ define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %
10791098
; HSA-LABEL: @kern_align32_global_ptr(
10801099
; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10811100
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1082-
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META4:![0-9]+]]
1101+
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META5:![0-9]+]]
10831102
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10841103
; HSA-NEXT: ret void
10851104
;
10861105
; MESA-LABEL: @kern_align32_global_ptr(
10871106
; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10881107
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1089-
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META4:![0-9]+]]
1108+
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META5:![0-9]+]]
10901109
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10911110
; MESA-NEXT: ret void
10921111
;
@@ -1120,14 +1139,14 @@ define amdgpu_kernel void @kern_noundef_global_ptr(ptr addrspace(1) noundef %ptr
11201139
; HSA-LABEL: @kern_noundef_global_ptr(
11211140
; HSA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
11221141
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1123-
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1142+
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
11241143
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
11251144
; HSA-NEXT: ret void
11261145
;
11271146
; MESA-LABEL: @kern_noundef_global_ptr(
11281147
; MESA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
11291148
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1130-
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1149+
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
11311150
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
11321151
; MESA-NEXT: ret void
11331152
;
@@ -1729,13 +1748,15 @@ attributes #2 = { nounwind "target-cpu"="tahiti" }
17291748
;.
17301749
; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
17311750
; HSA: [[META1]] = !{}
1732-
; HSA: [[META2]] = !{i64 42}
1733-
; HSA: [[META3]] = !{i64 128}
1734-
; HSA: [[META4]] = !{i64 1024}
1751+
; HSA: [[RNG2]] = !{i32 0, i32 8}
1752+
; HSA: [[META3]] = !{i64 42}
1753+
; HSA: [[META4]] = !{i64 128}
1754+
; HSA: [[META5]] = !{i64 1024}
17351755
;.
17361756
; MESA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
17371757
; MESA: [[META1]] = !{}
1738-
; MESA: [[META2]] = !{i64 42}
1739-
; MESA: [[META3]] = !{i64 128}
1740-
; MESA: [[META4]] = !{i64 1024}
1758+
; MESA: [[RNG2]] = !{i32 0, i32 8}
1759+
; MESA: [[META3]] = !{i64 42}
1760+
; MESA: [[META4]] = !{i64 128}
1761+
; MESA: [[META5]] = !{i64 1024}
17411762
;.

0 commit comments

Comments
 (0)