Skip to content

Commit 1c49ce6

Browse files
authored
[AMDGPU] Enable FWD_PROGRESS bit for GFX10+ on PAL (#139895)
Performance testing shows no significant gains or losses on graphics workloads, so this is mostly to make the behavior consistent across all supported OSes instead of special-casing HSA.
1 parent 28b8550 commit 1c49ce6

File tree

9 files changed

+24
-12
lines changed

9 files changed

+24
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,7 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
14151415

14161416
MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
14171417
MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
1418+
MD->setHwStage(CC, ".forward_progress", (bool)CurrentProgramInfo.FwdProgress);
14181419

14191420
if (AMDGPU::isCompute(CC)) {
14201421
MD->setHwStage(CC, ".trap_present",

llvm/lib/Target/AMDGPU/SIProgramInfo.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,15 @@ static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
8585
S_00B848_PRIV(ProgInfo.Priv) |
8686
S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
8787
S_00B848_WGP_MODE(ProgInfo.WgpMode) |
88-
S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
88+
S_00B848_MEM_ORDERED(ProgInfo.MemOrdered) |
89+
S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
8990

9091
if (ST.hasDX10ClampMode())
9192
Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
9293

9394
if (ST.hasIEEEMode())
9495
Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
9596

96-
// TODO: in the long run we will want to enable this unconditionally.
97-
if (ST.getTargetTriple().getOS() == Triple::OSType::AMDHSA)
98-
Reg |= S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
99-
10097
if (ST.hasRrWGMode())
10198
Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
10299

llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3211,7 +3211,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
32113211
; GFX10-NEXT: enable_ieee_mode = 1
32123212
; GFX10-NEXT: enable_wgp_mode = 1
32133213
; GFX10-NEXT: enable_mem_ordered = 1
3214-
; GFX10-NEXT: enable_fwd_progress = 0
3214+
; GFX10-NEXT: enable_fwd_progress = 1
32153215
; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
32163216
; GFX10-NEXT: user_sgpr_count = 14
32173217
; GFX10-NEXT: enable_trap_handler = 0
@@ -3303,7 +3303,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
33033303
; GFX11-NEXT: enable_ieee_mode = 1
33043304
; GFX11-NEXT: enable_wgp_mode = 1
33053305
; GFX11-NEXT: enable_mem_ordered = 1
3306-
; GFX11-NEXT: enable_fwd_progress = 0
3306+
; GFX11-NEXT: enable_fwd_progress = 1
33073307
; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
33083308
; GFX11-NEXT: user_sgpr_count = 13
33093309
; GFX11-NEXT: enable_trap_handler = 0
@@ -4215,7 +4215,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s
42154215
; GFX10-NEXT: enable_ieee_mode = 1
42164216
; GFX10-NEXT: enable_wgp_mode = 1
42174217
; GFX10-NEXT: enable_mem_ordered = 1
4218-
; GFX10-NEXT: enable_fwd_progress = 0
4218+
; GFX10-NEXT: enable_fwd_progress = 1
42194219
; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
42204220
; GFX10-NEXT: user_sgpr_count = 14
42214221
; GFX10-NEXT: enable_trap_handler = 0
@@ -4300,7 +4300,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s
43004300
; GFX11-NEXT: enable_ieee_mode = 1
43014301
; GFX11-NEXT: enable_wgp_mode = 1
43024302
; GFX11-NEXT: enable_mem_ordered = 1
4303-
; GFX11-NEXT: enable_fwd_progress = 0
4303+
; GFX11-NEXT: enable_fwd_progress = 1
43044304
; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
43054305
; GFX11-NEXT: user_sgpr_count = 13
43064306
; GFX11-NEXT: enable_trap_handler = 0
@@ -4569,7 +4569,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s
45694569
; GFX10-NEXT: enable_ieee_mode = 1
45704570
; GFX10-NEXT: enable_wgp_mode = 1
45714571
; GFX10-NEXT: enable_mem_ordered = 1
4572-
; GFX10-NEXT: enable_fwd_progress = 0
4572+
; GFX10-NEXT: enable_fwd_progress = 1
45734573
; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
45744574
; GFX10-NEXT: user_sgpr_count = 14
45754575
; GFX10-NEXT: enable_trap_handler = 0
@@ -4657,7 +4657,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s
46574657
; GFX11-NEXT: enable_ieee_mode = 1
46584658
; GFX11-NEXT: enable_wgp_mode = 1
46594659
; GFX11-NEXT: enable_mem_ordered = 1
4660-
; GFX11-NEXT: enable_fwd_progress = 0
4660+
; GFX11-NEXT: enable_fwd_progress = 1
46614661
; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
46624662
; GFX11-NEXT: user_sgpr_count = 13
46634663
; GFX11-NEXT: enable_trap_handler = 0

llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}}
88
; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf02c0{{$}}
99
; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}}
10-
; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x600f0000{{$}}
10+
; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}}
1111
define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
1212
%add = fadd half %arg0, 1.0
1313
ret half %add

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
; CHECK-NEXT: .debug_mode: 0
1818
; CHECK-NEXT: .excp_en: 0
1919
; CHECK-NEXT: .float_mode: 0xc0
20+
; CHECK-NEXT: .forward_progress: true
2021
; CHECK-NEXT: .image_op: false
2122
; CHECK-NEXT: .lds_size: 0x200
2223
; CHECK-NEXT: .mem_ordered: true

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
; CHECK-NEXT: .debug_mode: 0
2020
; CHECK-NEXT: .excp_en: 0
2121
; CHECK-NEXT: .float_mode: 0xc0
22+
; CHECK-NEXT: .forward_progress: true
2223
; GFX11-NEXT: .ieee_mode: true
2324
; CHECK-NEXT: .image_op: false
2425
; CHECK-NEXT: .lds_size: 0x200

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
6060
; CHECK-NEXT: .excp_en: 0
6161
; CHECK-NEXT: .float_mode: 0xc0
62+
; CHECK-NEXT: .forward_progress: true
6263
; CHECK-NEXT: .image_op: false
6364
; CHECK-NEXT: .lds_size: 0
6465
; CHECK-NEXT: .mem_ordered: true
@@ -113,6 +114,7 @@
113114
; CHECK-NEXT: .debug_mode: false
114115
; CHECK-NEXT: .entry_point: _amdgpu_gs
115116
; CHECK-NEXT: .entry_point_symbol: gs_shader
117+
; CHECK-NEXT: .forward_progress: true
116118
; CHECK-NEXT: .lds_size: 0x200
117119
; CHECK-NEXT: .mem_ordered: true
118120
; CHECK-NEXT: .scratch_en: false
@@ -124,6 +126,7 @@
124126
; CHECK-NEXT: .debug_mode: false
125127
; CHECK-NEXT: .entry_point: _amdgpu_hs
126128
; CHECK-NEXT: .entry_point_symbol: hs_shader
129+
; CHECK-NEXT: .forward_progress: true
127130
; CHECK-NEXT: .lds_size: 0x1000
128131
; CHECK-NEXT: .mem_ordered: true
129132
; CHECK-NEXT: .scratch_en: false
@@ -135,6 +138,7 @@
135138
; CHECK-NEXT: .debug_mode: false
136139
; CHECK-NEXT: .entry_point: _amdgpu_ps
137140
; CHECK-NEXT: .entry_point_symbol: ps_shader
141+
; CHECK-NEXT: .forward_progress: true
138142
; CHECK-NEXT: .lds_size: 0
139143
; CHECK-NEXT: .mem_ordered: true
140144
; CHECK-NEXT: .scratch_en: false

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
6363
; CHECK-NEXT: .excp_en: 0
6464
; CHECK-NEXT: .float_mode: 0xc0
65+
; CHECK-NEXT: .forward_progress: true
6566
; GFX11-NEXT: .ieee_mode: false
6667
; CHECK-NEXT: .image_op: false
6768
; CHECK-NEXT: .lds_size: 0
@@ -118,6 +119,7 @@
118119
; CHECK-NEXT: .debug_mode: false
119120
; CHECK-NEXT: .entry_point: _amdgpu_gs_main
120121
; CHECK-NEXT: .entry_point_symbol: gs_shader
122+
; CHECK-NEXT: .forward_progress: true
121123
; GFX11-NEXT: .ieee_mode: false
122124
; CHECK-NEXT: .lds_size: 0x200
123125
; CHECK-NEXT: .mem_ordered: true
@@ -130,6 +132,7 @@
130132
; CHECK-NEXT: .debug_mode: false
131133
; CHECK-NEXT: .entry_point: _amdgpu_hs_main
132134
; CHECK-NEXT: .entry_point_symbol: hs_shader
135+
; CHECK-NEXT: .forward_progress: true
133136
; GFX11-NEXT: .ieee_mode: false
134137
; CHECK-NEXT: .lds_size: 0x1000
135138
; CHECK-NEXT: .mem_ordered: true
@@ -142,6 +145,7 @@
142145
; CHECK-NEXT: .debug_mode: false
143146
; CHECK-NEXT: .entry_point: _amdgpu_ps_main
144147
; CHECK-NEXT: .entry_point_symbol: ps_shader
148+
; CHECK-NEXT: .forward_progress: true
145149
; GFX11-NEXT: .ieee_mode: false
146150
; CHECK-NEXT: .lds_size: 0
147151
; CHECK-NEXT: .mem_ordered: true

llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
6363
; CHECK-NEXT: .excp_en: 0
6464
; CHECK-NEXT: .float_mode: 0xc0
65+
; CHECK-NEXT: .forward_progress: true
6566
; GFX11-NEXT: .ieee_mode: false
6667
; CHECK-NEXT: .image_op: false
6768
; CHECK-NEXT: .lds_size: 0
@@ -118,6 +119,7 @@
118119
; CHECK-NEXT: .debug_mode: false
119120
; CHECK-NOT: .entry_point: _amdgpu_gs_main
120121
; CHECK-NEXT: .entry_point_symbol: gs_shader
122+
; CHECK-NEXT: .forward_progress: true
121123
; GFX11-NEXT: .ieee_mode: false
122124
; CHECK-NEXT: .lds_size: 0x200
123125
; CHECK-NEXT: .mem_ordered: true
@@ -130,6 +132,7 @@
130132
; CHECK-NEXT: .debug_mode: false
131133
; CHECK-NOT: .entry_point: _amdgpu_hs_main
132134
; CHECK-NEXT: .entry_point_symbol: hs_shader
135+
; CHECK-NEXT: .forward_progress: true
133136
; GFX11-NEXT: .ieee_mode: false
134137
; CHECK-NEXT: .lds_size: 0x1000
135138
; CHECK-NEXT: .mem_ordered: true
@@ -142,6 +145,7 @@
142145
; CHECK-NEXT: .debug_mode: false
143146
; CHECK-NOT: .entry_point: _amdgpu_ps_main
144147
; CHECK-NEXT: .entry_point_symbol: ps_shader
148+
; CHECK-NEXT: .forward_progress: true
145149
; GFX11-NEXT: .ieee_mode: false
146150
; CHECK-NEXT: .lds_size: 0
147151
; CHECK-NEXT: .mem_ordered: true

0 commit comments

Comments
 (0)