Skip to content

Commit c1d1c72

Browse files
author
git apple-llvm automerger
committed
Merge commit '849038cad16f' from llvm.org/main into next
2 parents 3ceadc7 + 849038c commit c1d1c72

File tree

3 files changed

+128
-4
lines changed

3 files changed

+128
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ enum ImplicitArgumentPositions {
3838
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
3939

4040
enum ImplicitArgumentMask {
41-
NOT_IMPLICIT_INPUT = 0,
41+
UNKNOWN_INTRINSIC = 0,
4242
#include "AMDGPUAttributes.def"
43-
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
43+
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
44+
NOT_IMPLICIT_INPUT
4445
};
4546

4647
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -115,7 +116,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
115116
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
116117
return QUEUE_PTR;
117118
default:
118-
return NOT_IMPLICIT_INPUT;
119+
return UNKNOWN_INTRINSIC;
119120
}
120121
}
121122

@@ -534,6 +535,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
534535
ImplicitArgumentMask AttrMask =
535536
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
536537
HasApertureRegs, SupportsGetDoorbellID, COV);
538+
539+
if (AttrMask == UNKNOWN_INTRINSIC) {
540+
// Assume not-nocallback intrinsics may invoke a function which accesses
541+
// implicit arguments.
542+
//
543+
// FIXME: This isn't really the correct check. We want to ensure it
544+
// isn't calling any function that may use implicit arguments regardless
545+
// of whether it's internal to the module or not.
546+
//
547+
// TODO: Ignoring callsite attributes.
548+
if (!Callee->hasFnAttribute(Attribute::NoCallback))
549+
return indicatePessimisticFixpoint();
550+
continue;
551+
}
552+
537553
if (AttrMask != NOT_IMPLICIT_INPUT) {
538554
if ((IsNonEntryFunc || !NonKernelOnly))
539555
removeAssumedBits(AttrMask);
@@ -1357,7 +1373,10 @@ struct AAAMDGPUMinAGPRAlloc
13571373
default:
13581374
// Some intrinsics may use AGPRs, but if we have a choice, we are not
13591375
// required to use AGPRs.
1360-
return true;
1376+
1377+
// Assume !nocallback intrinsics may call a function which requires
1378+
// AGPRs.
1379+
return CB.hasFnAttr(Attribute::NoCallback);
13611380
}
13621381

13631382
// TODO: Handle callsite attributes
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
3+
4+
; Make sure we do not infer anything about implicit inputs through an
5+
; intrinsic call which is not nocallback.
6+
7+
declare zeroext i32 @return_i32()
8+
9+
define i32 @test_i32_return() gc "statepoint-example" {
10+
; CHECK-LABEL: define i32 @test_i32_return(
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
12+
; CHECK-NEXT: [[ENTRY:.*:]]
13+
; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
14+
; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
15+
; CHECK-NEXT: ret i32 [[CALL1]]
16+
;
17+
entry:
18+
%safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
19+
%call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
20+
ret i32 %call1
21+
}
22+
23+
declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
24+
declare i32 @llvm.experimental.gc.result.i32(token) #0
25+
26+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
27+
;.
28+
; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
29+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
30+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
31+
;.
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
2+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
3+
4+
; Make sure we infer no inputs are used through some intrinsics
5+
6+
define void @use_fake_use(i32 %arg) {
7+
; CHECK-LABEL: define void @use_fake_use(
8+
; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
10+
; CHECK-NEXT: ret void
11+
;
12+
call void (...) @llvm.fake.use(i32 %arg)
13+
ret void
14+
}
15+
16+
define void @use_donothing() {
17+
; CHECK-LABEL: define void @use_donothing(
18+
; CHECK-SAME: ) #[[ATTR0]] {
19+
; CHECK-NEXT: call void @llvm.donothing()
20+
; CHECK-NEXT: ret void
21+
;
22+
call void @llvm.donothing()
23+
ret void
24+
}
25+
26+
define void @use_assume(i1 %arg) {
27+
; CHECK-LABEL: define void @use_assume(
28+
; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] {
29+
; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
30+
; CHECK-NEXT: ret void
31+
;
32+
call void @llvm.assume(i1 %arg)
33+
ret void
34+
}
35+
36+
define void @use_trap() {
37+
; CHECK-LABEL: define void @use_trap(
38+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
39+
; CHECK-NEXT: call void @llvm.trap()
40+
; CHECK-NEXT: ret void
41+
;
42+
call void @llvm.trap()
43+
ret void
44+
}
45+
46+
define void @use_debugtrap() {
47+
; CHECK-LABEL: define void @use_debugtrap(
48+
; CHECK-SAME: ) #[[ATTR1]] {
49+
; CHECK-NEXT: call void @llvm.debugtrap()
50+
; CHECK-NEXT: ret void
51+
;
52+
call void @llvm.debugtrap()
53+
ret void
54+
}
55+
56+
define void @use_ubsantrap() {
57+
; CHECK-LABEL: define void @use_ubsantrap(
58+
; CHECK-SAME: ) #[[ATTR1]] {
59+
; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
60+
; CHECK-NEXT: ret void
61+
;
62+
call void @llvm.ubsantrap(i8 0)
63+
ret void
64+
}
65+
66+
;.
67+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
68+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
69+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
70+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
71+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
72+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
73+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
74+
;.

0 commit comments

Comments
 (0)