Skip to content

Commit e21a9cd

Browse files
arsenmgithub-actions[bot]
authored andcommitted
Automerge: AMDGPU: Account for read/write register intrinsics for AGPR usage (#161988)
Fix the special case intrinsics that can directly reference a physical register. There's no reason to use this.
2 parents f2f193b + cb53a2d commit e21a9cd

File tree

4 files changed

+118
-14
lines changed

4 files changed

+118
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,10 +1328,23 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13281328
return false;
13291329
}
13301330

1331-
// Some intrinsics may use AGPRs, but if we have a choice, we are not
1332-
// required to use AGPRs.
1333-
if (Callee->isIntrinsic())
1331+
switch (Callee->getIntrinsicID()) {
1332+
case Intrinsic::not_intrinsic:
1333+
break;
1334+
case Intrinsic::write_register:
1335+
case Intrinsic::read_register:
1336+
case Intrinsic::read_volatile_register: {
1337+
const MDString *RegName = cast<MDString>(
1338+
cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
1339+
auto [Kind, RegIdx, NumRegs] =
1340+
AMDGPU::parseAsmPhysRegName(RegName->getString());
1341+
return Kind != 'a';
1342+
}
1343+
default:
1344+
// Some intrinsics may use AGPRs, but if we have a choice, we are not
1345+
// required to use AGPRs.
13341346
return true;
1347+
}
13351348

13361349
// TODO: Handle callsite attributes
13371350
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,12 +1569,7 @@ static bool isValidRegPrefix(char C) {
15691569
return C == 'v' || C == 's' || C == 'a';
15701570
}
15711571

1572-
std::tuple<char, unsigned, unsigned>
1573-
parseAsmConstraintPhysReg(StringRef Constraint) {
1574-
StringRef RegName = Constraint;
1575-
if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1576-
return {};
1577-
1572+
std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
15781573
char Kind = RegName.front();
15791574
if (!isValidRegPrefix(Kind))
15801575
return {};
@@ -1601,6 +1596,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) {
16011596
return {};
16021597
}
16031598

1599+
std::tuple<char, unsigned, unsigned>
1600+
parseAsmConstraintPhysReg(StringRef Constraint) {
1601+
StringRef RegName = Constraint;
1602+
if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1603+
return {};
1604+
return parseAsmPhysRegName(RegName);
1605+
}
1606+
16041607
std::pair<unsigned, unsigned>
16051608
getIntegerPairAttribute(const Function &F, StringRef Name,
16061609
std::pair<unsigned, unsigned> Default,

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV);
10131013
/// target triple \p TT, false otherwise.
10141014
bool shouldEmitConstantsToTextSection(const Triple &TT);
10151015

1016+
/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1017+
/// register name. Followed by the start register number, and the register
1018+
/// width. Does not validate the number of registers exists in the class. Unlike
1019+
/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1020+
/// "{}".
1021+
std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1022+
10161023
/// Returns a valid charcode or 0 in the first entry if this is a valid physical
10171024
/// register constraint. Followed by the start register number, and the register
10181025
/// width. Does not validate the number of registers exists in the class.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
181181
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
182182
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
183183
; CHECK-SAME: ) #[[ATTR1]] {
184-
; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
184+
; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
185185
; CHECK-NEXT: call void @use_most()
186186
; CHECK-NEXT: ret void
187187
;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
205205
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
206206
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
207207
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
208-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR5]]
208+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
209209
; CHECK-NEXT: call void @use_most()
210210
; CHECK-NEXT: ret void
211211
;
@@ -701,12 +701,93 @@ define amdgpu_kernel void @align2_align4_virtreg() {
701701
ret void
702702
}
703703

704+
define amdgpu_kernel void @kernel_uses_write_register_a55() {
705+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
706+
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
707+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a55", i32 0)
708+
; CHECK-NEXT: ret void
709+
;
710+
call void @llvm.write_register.i64(metadata !"a55", i32 0)
711+
ret void
712+
}
713+
714+
define amdgpu_kernel void @kernel_uses_write_register_v55() {
715+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
716+
; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
717+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"v55", i32 0)
718+
; CHECK-NEXT: ret void
719+
;
720+
call void @llvm.write_register.i64(metadata !"v55", i32 0)
721+
ret void
722+
}
723+
724+
define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
725+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
726+
; CHECK-SAME: ) #[[ATTR3]] {
727+
; CHECK-NEXT: call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0)
728+
; CHECK-NEXT: ret void
729+
;
730+
call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0)
731+
ret void
732+
}
733+
734+
define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
735+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
736+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
737+
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55")
738+
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
739+
; CHECK-NEXT: ret void
740+
;
741+
%reg = call i32 @llvm.read_register.i64(metadata !"a55")
742+
store i32 %reg, ptr addrspace(1) %ptr
743+
ret void
744+
}
745+
746+
define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
747+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
748+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
749+
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55")
750+
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
751+
; CHECK-NEXT: ret void
752+
;
753+
%reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55")
754+
store i32 %reg, ptr addrspace(1) %ptr
755+
ret void
756+
}
757+
758+
define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
759+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
760+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
761+
; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]")
762+
; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
763+
; CHECK-NEXT: ret void
764+
;
765+
%reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]")
766+
store i128 %reg, ptr addrspace(1) %ptr
767+
ret void
768+
}
769+
770+
define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
771+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
772+
; CHECK-SAME: ) #[[ATTR3]] {
773+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a256", i32 0)
774+
; CHECK-NEXT: ret void
775+
;
776+
call void @llvm.write_register.i64(metadata !"a256", i32 0)
777+
ret void
778+
}
779+
704780
attributes #0 = { "amdgpu-agpr-alloc"="0" }
705781
;.
706782
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
707783
; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
708784
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
709-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
710-
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
711-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
785+
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
786+
; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
787+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
788+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
789+
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
790+
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
791+
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
792+
; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
712793
;.

0 commit comments

Comments
 (0)