Skip to content

Commit f238c3f

Browse files
committed
Add suggested formatting changes, factor out common parts of emitKenrelArg. Update test.
1 parent 1660589 commit f238c3f

File tree

5 files changed

+102
-77
lines changed

5 files changed

+102
-77
lines changed

llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,25 +187,25 @@ AMDGPUFunctionArgInfo::getPreloadDescriptorsForArgIdx(unsigned ArgIdx) const {
187187
Results.push_back(&KV.second);
188188
}
189189

190-
llvm::stable_sort(Results, [](const KernArgPreloadDescriptor *A,
191-
const KernArgPreloadDescriptor *B) {
190+
stable_sort(Results, [](const KernArgPreloadDescriptor *A,
191+
const KernArgPreloadDescriptor *B) {
192192
return A->PartIdx < B->PartIdx;
193193
});
194194

195195
return Results;
196196
}
197197

198-
std::optional<const KernArgPreloadDescriptor *>
198+
const KernArgPreloadDescriptor *
199199
AMDGPUFunctionArgInfo::getHiddenArgPreloadDescriptor(HiddenArg HA) const {
200200
assert(HA < END_HIDDEN_ARGS);
201201

202202
auto HiddenArgIt = PreloadHiddenArgsIndexMap.find(HA);
203203
if (HiddenArgIt == PreloadHiddenArgsIndexMap.end())
204-
return std::nullopt;
204+
return nullptr;
205205

206206
auto KernArgIt = PreloadKernArgs.find(HiddenArgIt->second);
207207
if (KernArgIt == PreloadKernArgs.end())
208-
return std::nullopt;
208+
return nullptr;
209209

210210
return &KernArgIt->second;
211211
}

llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,25 +133,26 @@ struct HiddenArgUtils {
133133
{22, 2, "_hidden_remainder_z"}};
134134

135135
static HiddenArg getHiddenArgFromOffset(unsigned Offset) {
136-
for (unsigned I = 0; I < END_HIDDEN_ARGS; ++I)
136+
for (unsigned I = 0; I < END_HIDDEN_ARGS; ++I) {
137137
if (HiddenArgs[I].Offset == Offset)
138138
return static_cast<HiddenArg>(I);
139+
}
139140

140141
return END_HIDDEN_ARGS;
141142
}
142143

143144
static Type *getHiddenArgType(LLVMContext &Ctx, HiddenArg HA) {
144145
if (HA < END_HIDDEN_ARGS)
145-
return static_cast<Type *>(Type::getIntNTy(Ctx, HiddenArgs[HA].Size * 8));
146+
return Type::getIntNTy(Ctx, HiddenArgs[HA].Size * 8);
146147

147-
llvm_unreachable("Unexpected hidden argument.");
148+
llvm_unreachable("unexpected hidden argument");
148149
}
149150

150151
static const char *getHiddenArgName(HiddenArg HA) {
151-
if (HA < END_HIDDEN_ARGS) {
152+
if (HA < END_HIDDEN_ARGS)
152153
return HiddenArgs[HA].Name;
153-
}
154-
llvm_unreachable("Unexpected hidden argument.");
154+
155+
llvm_unreachable("unexpected hidden argument");
155156
}
156157
};
157158

@@ -250,7 +251,7 @@ struct AMDGPUFunctionArgInfo {
250251
getPreloadDescriptorsForArgIdx(unsigned ArgIdx) const;
251252

252253
// Returns the hidden arguments `KernArgPreloadDescriptor` if it is preloaded.
253-
std::optional<const KernArgPreload::KernArgPreloadDescriptor *>
254+
const KernArgPreload::KernArgPreloadDescriptor *
254255
getHiddenArgPreloadDescriptor(KernArgPreload::HiddenArg HA) const;
255256
};
256257

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 11 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,12 @@ void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
303303
unsigned &Offset,
304304
msgpack::ArrayDocNode Args,
305305
const MachineFunction &MF) {
306+
emitKernelArgCommon(Arg, Offset, Args, MF);
307+
}
308+
309+
void MetadataStreamerMsgPackV4::emitKernelArgCommon(
310+
const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args,
311+
const MachineFunction &MF, StringRef PreloadRegisters) {
306312
const auto *Func = Arg.getParent();
307313
auto ArgNo = Arg.getArgNo();
308314
const MDNode *Node;
@@ -361,7 +367,7 @@ void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
361367

362368
emitKernelArgImpl(DL, ArgTy, ArgAlign,
363369
getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
364-
"" /* PreloadRegisters */, PointeeAlign, Name, TypeName,
370+
PreloadRegisters, PointeeAlign, Name, TypeName,
365371
BaseTypeName, ActAccQual, AccQual, TypeQual);
366372
}
367373

@@ -768,9 +774,9 @@ void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload(
768774
msgpack::ArrayDocNode Args, const AMDGPUFunctionArgInfo &ArgInfo) {
769775

770776
SmallString<16> PreloadStr;
771-
auto PreloadDesc = ArgInfo.getHiddenArgPreloadDescriptor(HiddenArg);
777+
const auto *PreloadDesc = ArgInfo.getHiddenArgPreloadDescriptor(HiddenArg);
772778
if (PreloadDesc) {
773-
const auto &Regs = (*PreloadDesc)->Regs;
779+
const auto &Regs = PreloadDesc->Regs;
774780
for (unsigned I = 0; I < Regs.size(); ++I) {
775781
if (I > 0)
776782
PreloadStr += " ";
@@ -918,63 +924,12 @@ void MetadataStreamerMsgPackV6::emitKernelArg(const Argument &Arg,
918924
unsigned &Offset,
919925
msgpack::ArrayDocNode Args,
920926
const MachineFunction &MF) {
921-
const auto *Func = Arg.getParent();
922-
auto ArgNo = Arg.getArgNo();
923-
const MDNode *Node;
924-
925-
StringRef Name;
926-
Node = Func->getMetadata("kernel_arg_name");
927-
if (Node && ArgNo < Node->getNumOperands())
928-
Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
929-
else if (Arg.hasName())
930-
Name = Arg.getName();
931-
932-
StringRef TypeName;
933-
Node = Func->getMetadata("kernel_arg_type");
934-
if (Node && ArgNo < Node->getNumOperands())
935-
TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
936-
937-
StringRef BaseTypeName;
938-
Node = Func->getMetadata("kernel_arg_base_type");
939-
if (Node && ArgNo < Node->getNumOperands())
940-
BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
941-
942-
StringRef ActAccQual;
943-
// Do we really need NoAlias check here?
944-
if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
945-
if (Arg.onlyReadsMemory())
946-
ActAccQual = "read_only";
947-
else if (Arg.hasAttribute(Attribute::WriteOnly))
948-
ActAccQual = "write_only";
949-
}
950-
951-
StringRef AccQual;
952-
Node = Func->getMetadata("kernel_arg_access_qual");
953-
if (Node && ArgNo < Node->getNumOperands())
954-
AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
955-
956-
StringRef TypeQual;
957-
Node = Func->getMetadata("kernel_arg_type_qual");
958-
if (Node && ArgNo < Node->getNumOperands())
959-
TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
960-
961-
const DataLayout &DL = Func->getDataLayout();
962-
963-
MaybeAlign PointeeAlign;
964-
Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
965-
966-
// FIXME: Need to distinguish in memory alignment from pointer alignment.
967-
if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
968-
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
969-
PointeeAlign = Arg.getParamAlign().valueOrOne();
970-
}
971-
972927
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
973928
SmallString<8> PreloadRegisters;
974929
if (MFI->getNumKernargPreloadedSGPRs()) {
975930
assert(MF.getSubtarget<GCNSubtarget>().hasKernargPreload());
976931
const auto &PreloadDescs =
977-
MFI->getArgInfo().getPreloadDescriptorsForArgIdx(ArgNo);
932+
MFI->getArgInfo().getPreloadDescriptorsForArgIdx(Arg.getArgNo());
978933
for (auto &Desc : PreloadDescs) {
979934
if (!PreloadRegisters.empty())
980935
PreloadRegisters += " ";
@@ -987,15 +942,7 @@ void MetadataStreamerMsgPackV6::emitKernelArg(const Argument &Arg,
987942
}
988943
}
989944

990-
// There's no distinction between byval aggregates and raw aggregates.
991-
Type *ArgTy;
992-
Align ArgAlign;
993-
std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
994-
995-
emitKernelArgImpl(DL, ArgTy, ArgAlign,
996-
getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
997-
PreloadRegisters, PointeeAlign, Name, TypeName,
998-
BaseTypeName, ActAccQual, AccQual, TypeQual);
945+
emitKernelArgCommon(Arg, Offset, Args, MF, PreloadRegisters);
999946
}
1000947

1001948
} // end namespace AMDGPU::HSAMD

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4
115115
msgpack::ArrayDocNode Args,
116116
const MachineFunction &MF) override;
117117

118+
void emitKernelArgCommon(const Argument &Arg, unsigned &Offset,
119+
msgpack::ArrayDocNode Args,
120+
const MachineFunction &MF,
121+
StringRef PreloadRegisters = {});
122+
118123
void emitKernelArgImpl(const DataLayout &DL, Type *Ty, Align Alignment,
119124
StringRef ValueKind, unsigned &Offset,
120125
msgpack::ArrayDocNode Args,

llvm/test/CodeGen/AMDGPU/hsa-metadata-preload-args-v6.ll

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,74 @@
314314
; CHECK-NEXT: .vgpr_count: 2
315315
; CHECK-NEXT: .vgpr_spill_count: 0
316316
; CHECK-NEXT: .wavefront_size: 64
317+
; CHECK-NEXT: - .agpr_count: 0
318+
; CHECK-NEXT: .args:
319+
; CHECK-NEXT: - .address_space: global
320+
; CHECK-NEXT: .name: out
321+
; CHECK-NEXT: .offset: 0
322+
; CHECK-NEXT: .preload_registers: 's[2:3]'
323+
; CHECK-NEXT: .size: 8
324+
; CHECK-NEXT: .value_kind: global_buffer
325+
; CHECK-NEXT: - .name: arg0
326+
; CHECK-NEXT: .offset: 16
327+
; CHECK-NEXT: .preload_registers: s6 s7 s8 s9
328+
; CHECK-NEXT: .size: 16
329+
; CHECK-NEXT: .value_kind: by_value
330+
; CHECK-NEXT: - .offset: 32
331+
; CHECK-NEXT: .size: 4
332+
; CHECK-NEXT: .value_kind: hidden_block_count_x
333+
; CHECK-NEXT: - .offset: 36
334+
; CHECK-NEXT: .size: 4
335+
; CHECK-NEXT: .value_kind: hidden_block_count_y
336+
; CHECK-NEXT: - .offset: 40
337+
; CHECK-NEXT: .size: 4
338+
; CHECK-NEXT: .value_kind: hidden_block_count_z
339+
; CHECK-NEXT: - .offset: 44
340+
; CHECK-NEXT: .size: 2
341+
; CHECK-NEXT: .value_kind: hidden_group_size_x
342+
; CHECK-NEXT: - .offset: 46
343+
; CHECK-NEXT: .size: 2
344+
; CHECK-NEXT: .value_kind: hidden_group_size_y
345+
; CHECK-NEXT: - .offset: 48
346+
; CHECK-NEXT: .size: 2
347+
; CHECK-NEXT: .value_kind: hidden_group_size_z
348+
; CHECK-NEXT: - .offset: 50
349+
; CHECK-NEXT: .size: 2
350+
; CHECK-NEXT: .value_kind: hidden_remainder_x
351+
; CHECK-NEXT: - .offset: 52
352+
; CHECK-NEXT: .size: 2
353+
; CHECK-NEXT: .value_kind: hidden_remainder_y
354+
; CHECK-NEXT: - .offset: 54
355+
; CHECK-NEXT: .size: 2
356+
; CHECK-NEXT: .value_kind: hidden_remainder_z
357+
; CHECK-NEXT: - .offset: 72
358+
; CHECK-NEXT: .size: 8
359+
; CHECK-NEXT: .value_kind: hidden_global_offset_x
360+
; CHECK-NEXT: - .offset: 80
361+
; CHECK-NEXT: .size: 8
362+
; CHECK-NEXT: .value_kind: hidden_global_offset_y
363+
; CHECK-NEXT: - .offset: 88
364+
; CHECK-NEXT: .size: 8
365+
; CHECK-NEXT: .value_kind: hidden_global_offset_z
366+
; CHECK-NEXT: - .offset: 96
367+
; CHECK-NEXT: .size: 2
368+
; CHECK-NEXT: .value_kind: hidden_grid_dims
369+
; CHECK-NEXT: - .offset: 104
370+
; CHECK-NEXT: .size: 8
371+
; CHECK-NEXT: .value_kind: hidden_printf_buffer
372+
; CHECK-NEXT: .group_segment_fixed_size: 0
373+
; CHECK-NEXT: .kernarg_segment_align: 16
374+
; CHECK-NEXT: .kernarg_segment_size: 288
375+
; CHECK-NEXT: .max_flat_workgroup_size: 1024
376+
; CHECK-NEXT: .name: test_prelaod_v6_ptr1_v8i16
377+
; CHECK-NEXT: .private_segment_fixed_size: 0
378+
; CHECK-NEXT: .sgpr_count: 16
379+
; CHECK-NEXT: .sgpr_spill_count: 0
380+
; CHECK-NEXT: .symbol: test_prelaod_v6_ptr1_v8i16.kd
381+
; CHECK-NEXT: .uses_dynamic_stack: false
382+
; CHECK-NEXT: .vgpr_count: 5
383+
; CHECK-NEXT: .vgpr_spill_count: 0
384+
; CHECK-NEXT: .wavefront_size: 64
317385
; CHECK-NEXT: amdhsa.printf:
318386
; CHECK-NEXT: - '1:1:4:%d\n'
319387
; CHECK-NEXT: - '2:1:8:%g\n'
@@ -377,6 +445,10 @@ define amdgpu_kernel void @test_prelaod_v6_ptr1_i16_i16(ptr addrspace(1) inreg %
377445
ret void
378446
}
379447

448+
define amdgpu_kernel void @test_prelaod_v6_ptr1_v8i16(ptr addrspace(1) inreg %out, <8 x i16> inreg %arg0) #1 {
449+
store <8 x i16> %arg0, ptr addrspace(1) %out, align 4
450+
ret void
451+
}
380452

381453
!llvm.module.flags = !{!0}
382454
!0 = !{i32 1, !"amdhsa_code_object_version", i32 600}
@@ -385,4 +457,4 @@ define amdgpu_kernel void @test_prelaod_v6_ptr1_i16_i16(ptr addrspace(1) inreg %
385457
!2 = !{!"2:1:8:%g\5Cn"}
386458

387459
attributes #0 = { optnone noinline }
388-
attributes #1 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
460+
attributes #1 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }

0 commit comments

Comments
 (0)