Skip to content

Commit 3b6aa65

Browse files
committed
AMDGPU: Account for read/write register intrinsics for AGPR usage
Fix the special case intrinsics that can directly reference a physical register. There's no reason to use this.
1 parent 43894ee commit 3b6aa65

File tree

4 files changed

+118
-14
lines changed

4 files changed

+118
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,10 +1313,23 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13131313
return false;
13141314
}
13151315

1316-
// Some intrinsics may use AGPRs, but if we have a choice, we are not
1317-
// required to use AGPRs.
1318-
if (Callee->isIntrinsic())
1316+
switch (Callee->getIntrinsicID()) {
1317+
case Intrinsic::not_intrinsic:
1318+
break;
1319+
case Intrinsic::write_register:
1320+
case Intrinsic::read_register:
1321+
case Intrinsic::read_volatile_register: {
1322+
const MDString *RegName = cast<MDString>(
1323+
cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
1324+
auto [Kind, RegIdx, NumRegs] =
1325+
AMDGPU::parseAsmPhysRegName(RegName->getString());
1326+
return Kind != 'a';
1327+
}
1328+
default:
1329+
// Some intrinsics may use AGPRs, but if we have a choice, we are not
1330+
// required to use AGPRs.
13191331
return true;
1332+
}
13201333

13211334
// TODO: Handle callsite attributes
13221335
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,12 +1569,7 @@ static bool isValidRegPrefix(char C) {
15691569
return C == 'v' || C == 's' || C == 'a';
15701570
}
15711571

1572-
std::tuple<char, unsigned, unsigned>
1573-
parseAsmConstraintPhysReg(StringRef Constraint) {
1574-
StringRef RegName = Constraint;
1575-
if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1576-
return {};
1577-
1572+
std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
15781573
char Kind = RegName.front();
15791574
if (!isValidRegPrefix(Kind))
15801575
return {};
@@ -1601,6 +1596,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) {
16011596
return {};
16021597
}
16031598

1599+
std::tuple<char, unsigned, unsigned>
1600+
parseAsmConstraintPhysReg(StringRef Constraint) {
1601+
StringRef RegName = Constraint;
1602+
if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1603+
return {};
1604+
return parseAsmPhysRegName(RegName);
1605+
}
1606+
16041607
std::pair<unsigned, unsigned>
16051608
getIntegerPairAttribute(const Function &F, StringRef Name,
16061609
std::pair<unsigned, unsigned> Default,

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV);
10131013
/// target triple \p TT, false otherwise.
10141014
bool shouldEmitConstantsToTextSection(const Triple &TT);
10151015

1016+
/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1017+
/// register name. Followed by the start register number, and the register
1018+
/// width. Does not validate the number of registers exists in the class. Unlike
1019+
/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1020+
/// "{}".
1021+
std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1022+
10161023
/// Returns a valid charcode or 0 in the first entry if this is a valid physical
10171024
/// register constraint. Followed by the start register number, and the register
10181025
/// width. Does not validate the number of registers exists in the class.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
181181
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
182182
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
183183
; CHECK-SAME: ) #[[ATTR1]] {
184-
; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
184+
; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
185185
; CHECK-NEXT: call void @use_most()
186186
; CHECK-NEXT: ret void
187187
;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
205205
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
206206
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
207207
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
208-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR5]]
208+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
209209
; CHECK-NEXT: call void @use_most()
210210
; CHECK-NEXT: ret void
211211
;
@@ -652,12 +652,93 @@ define amdgpu_kernel void @physreg_raises_limit() {
652652
ret void
653653
}
654654

655+
define amdgpu_kernel void @kernel_uses_write_register_a55() {
656+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
657+
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
658+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a55", i32 0)
659+
; CHECK-NEXT: ret void
660+
;
661+
call void @llvm.write_register.i64(metadata !"a55", i32 0)
662+
ret void
663+
}
664+
665+
define amdgpu_kernel void @kernel_uses_write_register_v55() {
666+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
667+
; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
668+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"v55", i32 0)
669+
; CHECK-NEXT: ret void
670+
;
671+
call void @llvm.write_register.i64(metadata !"v55", i32 0)
672+
ret void
673+
}
674+
675+
define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
676+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
677+
; CHECK-SAME: ) #[[ATTR3]] {
678+
; CHECK-NEXT: call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0)
679+
; CHECK-NEXT: ret void
680+
;
681+
call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0)
682+
ret void
683+
}
684+
685+
define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
686+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
687+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
688+
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55")
689+
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
690+
; CHECK-NEXT: ret void
691+
;
692+
%reg = call i32 @llvm.read_register.i64(metadata !"a55")
693+
store i32 %reg, ptr addrspace(1) %ptr
694+
ret void
695+
}
696+
697+
define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
698+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
699+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
700+
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55")
701+
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
702+
; CHECK-NEXT: ret void
703+
;
704+
%reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55")
705+
store i32 %reg, ptr addrspace(1) %ptr
706+
ret void
707+
}
708+
709+
define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
710+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
711+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
712+
; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]")
713+
; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
714+
; CHECK-NEXT: ret void
715+
;
716+
%reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]")
717+
store i128 %reg, ptr addrspace(1) %ptr
718+
ret void
719+
}
720+
721+
define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
722+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
723+
; CHECK-SAME: ) #[[ATTR3]] {
724+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a256", i32 0)
725+
; CHECK-NEXT: ret void
726+
;
727+
call void @llvm.write_register.i64(metadata !"a256", i32 0)
728+
ret void
729+
}
730+
655731
attributes #0 = { "amdgpu-agpr-alloc"="0" }
656732
;.
657733
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
658734
; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
659735
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
660-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
661-
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
662-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
736+
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
737+
; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
738+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
739+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
740+
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
741+
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
742+
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
743+
; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
663744
;.

0 commit comments

Comments
 (0)