Skip to content

Commit 849038c

Browse files
authored
AMDGPU: Do not infer implicit inputs for !nocallback intrinsics
(#131759) This isn't really the right check, we want to know that the intrinsic does not perform a true function call to any code (in the module or not). nocallback appears to be the closest thing to this property we have now though. Fixes theoretically miscompiles with intrinsics like statepoint, which hide a call to a real function. Also do the same for inferring no-agpr usage.
1 parent 6111ff1 commit 849038c

File tree

3 files changed

+128
-4
lines changed

3 files changed

+128
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ enum ImplicitArgumentPositions {
3838
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
3939

4040
enum ImplicitArgumentMask {
41-
NOT_IMPLICIT_INPUT = 0,
41+
UNKNOWN_INTRINSIC = 0,
4242
#include "AMDGPUAttributes.def"
43-
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
43+
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
44+
NOT_IMPLICIT_INPUT
4445
};
4546

4647
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -115,7 +116,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
115116
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
116117
return QUEUE_PTR;
117118
default:
118-
return NOT_IMPLICIT_INPUT;
119+
return UNKNOWN_INTRINSIC;
119120
}
120121
}
121122

@@ -534,6 +535,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
534535
ImplicitArgumentMask AttrMask =
535536
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
536537
HasApertureRegs, SupportsGetDoorbellID, COV);
538+
539+
if (AttrMask == UNKNOWN_INTRINSIC) {
540+
// Assume not-nocallback intrinsics may invoke a function which accesses
541+
// implicit arguments.
542+
//
543+
// FIXME: This isn't really the correct check. We want to ensure it
544+
// isn't calling any function that may use implicit arguments regardless
545+
// of whether it's internal to the module or not.
546+
//
547+
// TODO: Ignoring callsite attributes.
548+
if (!Callee->hasFnAttribute(Attribute::NoCallback))
549+
return indicatePessimisticFixpoint();
550+
continue;
551+
}
552+
537553
if (AttrMask != NOT_IMPLICIT_INPUT) {
538554
if ((IsNonEntryFunc || !NonKernelOnly))
539555
removeAssumedBits(AttrMask);
@@ -1357,7 +1373,10 @@ struct AAAMDGPUMinAGPRAlloc
13571373
default:
13581374
// Some intrinsics may use AGPRs, but if we have a choice, we are not
13591375
// required to use AGPRs.
1360-
return true;
1376+
1377+
// Assume !nocallback intrinsics may call a function which requires
1378+
// AGPRs.
1379+
return CB.hasFnAttr(Attribute::NoCallback);
13611380
}
13621381

13631382
// TODO: Handle callsite attributes
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
3+
4+
; Make sure we do not infer anything about implicit inputs through an
5+
; intrinsic call which is not nocallback.
6+
7+
declare zeroext i32 @return_i32()
8+
9+
define i32 @test_i32_return() gc "statepoint-example" {
10+
; CHECK-LABEL: define i32 @test_i32_return(
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
12+
; CHECK-NEXT: [[ENTRY:.*:]]
13+
; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
14+
; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
15+
; CHECK-NEXT: ret i32 [[CALL1]]
16+
;
17+
entry:
18+
%safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
19+
%call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
20+
ret i32 %call1
21+
}
22+
23+
declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
24+
declare i32 @llvm.experimental.gc.result.i32(token) #0
25+
26+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
27+
;.
28+
; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
29+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
30+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
31+
;.
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
2+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
3+
4+
; Make sure we infer no inputs are used through some intrinsics
5+
6+
define void @use_fake_use(i32 %arg) {
7+
; CHECK-LABEL: define void @use_fake_use(
8+
; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
10+
; CHECK-NEXT: ret void
11+
;
12+
call void (...) @llvm.fake.use(i32 %arg)
13+
ret void
14+
}
15+
16+
define void @use_donothing() {
17+
; CHECK-LABEL: define void @use_donothing(
18+
; CHECK-SAME: ) #[[ATTR0]] {
19+
; CHECK-NEXT: call void @llvm.donothing()
20+
; CHECK-NEXT: ret void
21+
;
22+
call void @llvm.donothing()
23+
ret void
24+
}
25+
26+
define void @use_assume(i1 %arg) {
27+
; CHECK-LABEL: define void @use_assume(
28+
; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] {
29+
; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
30+
; CHECK-NEXT: ret void
31+
;
32+
call void @llvm.assume(i1 %arg)
33+
ret void
34+
}
35+
36+
define void @use_trap() {
37+
; CHECK-LABEL: define void @use_trap(
38+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
39+
; CHECK-NEXT: call void @llvm.trap()
40+
; CHECK-NEXT: ret void
41+
;
42+
call void @llvm.trap()
43+
ret void
44+
}
45+
46+
define void @use_debugtrap() {
47+
; CHECK-LABEL: define void @use_debugtrap(
48+
; CHECK-SAME: ) #[[ATTR1]] {
49+
; CHECK-NEXT: call void @llvm.debugtrap()
50+
; CHECK-NEXT: ret void
51+
;
52+
call void @llvm.debugtrap()
53+
ret void
54+
}
55+
56+
define void @use_ubsantrap() {
57+
; CHECK-LABEL: define void @use_ubsantrap(
58+
; CHECK-SAME: ) #[[ATTR1]] {
59+
; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
60+
; CHECK-NEXT: ret void
61+
;
62+
call void @llvm.ubsantrap(i8 0)
63+
ret void
64+
}
65+
66+
;.
67+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
68+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
69+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
70+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
71+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
72+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
73+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
74+
;.

0 commit comments

Comments
 (0)