Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18188,7 +18188,7 @@ terminated by an ``.end_amdhsa_kernel`` directive.
(cumode)
``.amdhsa_memory_ordered`` 1 GFX10-GFX12 Controls MEM_ORDERED in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
``.amdhsa_forward_progress`` 0 GFX10-GFX12 Controls FWD_PROGRESS in
``.amdhsa_forward_progress`` 1 GFX10-GFX12 Controls FWD_PROGRESS in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
``.amdhsa_shared_vgpr_count`` 0 GFX10-GFX11 Controls SHARED_VGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table`.
Expand Down
4 changes: 4 additions & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ Changes to the AArch64 Backend
Changes to the AMDGPU Backend
-----------------------------

* Enabled the
[FWD_PROGRESS bit](https://llvm.org/docs/AMDGPUUsage.html#code-object-v3-kernel-descriptor)
for all GFX ISAs greater or equal to 10

Changes to the ARM Backend
--------------------------

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
ProgInfo.MemOrdered = 1;
ProgInfo.FwdProgress = 1;
}

// 0 = X, 1 = XY, 2 = XYZ
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI,
KD.compute_pgm_rsrc1, OneMCExpr,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx);

MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc1, OneMCExpr,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Ctx);
}
if (AMDGPU::isGFX90A(*STI) && STI->getFeatureBits().test(FeatureTgSplit))
MCKernelDescriptor::bits_set(
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ void SIProgramInfo::reset(const MachineFunction &MF) {
IEEEMode = 0;
WgpMode = 0;
MemOrdered = 0;
FwdProgress = 0;
RrWgMode = 0;
ScratchSize = ZeroExpr;

Expand Down Expand Up @@ -92,6 +93,10 @@ static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
if (ST.hasIEEEMode())
Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);

// TODO: in the long run we will want to enable this unconditionally.
if (ST.getTargetTriple().getOS() == Triple::OSType::AMDHSA)
Reg |= S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);

if (ST.hasRrWGMode())
Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);

Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/SIProgramInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ struct LLVM_EXTERNAL_VISIBILITY SIProgramInfo {
uint32_t DX10Clamp = 0;
uint32_t DebugMode = 0;
uint32_t IEEEMode = 0;
uint32_t WgpMode = 0; // GFX10+
uint32_t MemOrdered = 0; // GFX10+
uint32_t RrWgMode = 0; // GFX12+
uint32_t WgpMode = 0; // GFX10+
uint32_t MemOrdered = 0; // GFX10+
uint32_t FwdProgress = 0; // GFX10+
uint32_t RrWgMode = 0; // GFX12+
const MCExpr *ScratchSize = nullptr;

// State used to calculate fields set in PGM_RSRC2 pm4 packet.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,7 @@ void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
if (Version.Major >= 10) {
KernelCode.compute_pgm_resource_registers |=
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
S_00B848_MEM_ORDERED(1);
S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
}
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 00000c60 80000000 00040000 00000000
// OBJDUMP-NEXT: 0030 00000ce0 80000000 00040000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
Expand All @@ -39,12 +39,12 @@
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00b0 00000060 80000000 00040000 00000000
// OBJDUMP-NEXT: 00b0 000000e0 80000000 00040000 00000000
// disabled_user_sgpr
// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00f0 00000c60 80000000 00040000 00000000
// OBJDUMP-NEXT: 00f0 00000ce0 80000000 00040000 00000000

.text

Expand Down
20 changes: 10 additions & 10 deletions llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,16 @@ expr_defined:
// ASM-NEXT: .amdhsa_reserve_vcc defined_boolean
// ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&12288)>>12
// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&49152)>>14
// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&196608)>>16
// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&786432)>>18
// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21
// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&8388608)>>23
// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&67108864)>>26
// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29
// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30
// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31
// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&12288)>>12
// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&49152)>>14
// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&196608)>>16
// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&786432)>>18
// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21
// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&8388608)>>23
// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&67108864)>>26
// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29
// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30
// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31
// ASM-NEXT: .amdhsa_shared_vgpr_count 0
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&16777216)>>24
// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&33554432)>>25
Expand Down
Loading