Skip to content

Commit 51739cd

Browse files
committed
- Address reviewer comments
1 parent e75ab4d commit 51739cd

File tree

2 files changed

+33
-17
lines changed

2 files changed

+33
-17
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1717,7 +1717,7 @@ The AMDGPU backend supports the following LLVM IR attributes.
17171717

17181718
"amdgpu-sgpr-hazard-mem-wait-cull" Enable insertion of SGPR hazard cull sequences before memory waits.
17191719
Cull sequence reduces future hazard waits, but has a performance cost.
1720-
Attempt to amortize cost by overlaping with memory access.
1720+
Attempt to amortize cost by overlapping with memory accesses.
17211721

17221722
"amdgpu-sgpr-hazard-mem-wait-cull-threshold"
17231723
Sets the number of active SGPR hazards that must be present before

llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard-attrs.mir

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec,amdgpu-wait-sgpr-hazards -o - %s | FileCheck -check-prefix=GCN %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass post-RA-hazard-rec,amdgpu-wait-sgpr-hazards -o - %s | FileCheck -check-prefix=GCN %s
33

44
--- |
5-
@mem = internal unnamed_addr addrspace(4) constant [4 x <4 x i32>] [<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>]
6-
75
define amdgpu_gs void @hazard_disable() #0 { ret void }
8-
define amdgpu_cs void @hazard_calls() #1 { ret void }
9-
define void @hazard_callee1() #1 { ret void }
10-
define void @hazard_callee2() #1 { ret void }
11-
define amdgpu_cs void @hazard_cull_vmem() #2 { ret void }
12-
define amdgpu_cs void @hazard_cull_vmem2() #3 { ret void }
13-
define amdgpu_cs void @hazard_cull_sample() #2 { ret void }
14-
define amdgpu_cs void @hazard_cull_bvh() #2 { ret void }
15-
define amdgpu_cs void @hazard_nocull_scratch() #2 { ret void }
16-
define amdgpu_cs void @hazard_cull_global() #2 { ret void }
17-
define amdgpu_cs void @hazard_nocull_flat() #2 { ret void }
6+
define amdgpu_gs void @hazard_enable() #1 { ret void }
7+
define amdgpu_cs void @hazard_calls() #2 { ret void }
8+
define void @hazard_callee1() #2 { ret void }
9+
define void @hazard_callee2() #2 { ret void }
10+
define amdgpu_cs void @hazard_cull_vmem() #3 { ret void }
11+
define amdgpu_cs void @hazard_cull_vmem2() #4 { ret void }
12+
define amdgpu_cs void @hazard_cull_sample() #3 { ret void }
13+
define amdgpu_cs void @hazard_cull_bvh() #3 { ret void }
14+
define amdgpu_cs void @hazard_nocull_scratch() #3 { ret void }
15+
define amdgpu_cs void @hazard_cull_global() #3 { ret void }
16+
define amdgpu_cs void @hazard_nocull_flat() #3 { ret void }
1817

1918
attributes #0 = { "amdgpu-sgpr-hazard-wait"="0" }
20-
attributes #1 = { "amdgpu-sgpr-hazard-boundary-cull" }
21-
attributes #2 = { "amdgpu-sgpr-hazard-mem-wait-cull" "amdgpu-sgpr-hazard-mem-wait-cull-threshold"="1" }
22-
attributes #3 = { "amdgpu-sgpr-hazard-mem-wait-cull" "amdgpu-sgpr-hazard-mem-wait-cull-threshold"="2" }
19+
attributes #1 = { "amdgpu-sgpr-hazard-wait"="1" }
20+
attributes #2 = { "amdgpu-sgpr-hazard-boundary-cull" }
21+
attributes #3 = { "amdgpu-sgpr-hazard-mem-wait-cull" "amdgpu-sgpr-hazard-mem-wait-cull-threshold"="1" }
22+
attributes #4 = { "amdgpu-sgpr-hazard-mem-wait-cull" "amdgpu-sgpr-hazard-mem-wait-cull-threshold"="2" }
2323
...
2424

2525
---
@@ -37,6 +37,22 @@ body: |
3737
S_ENDPGM 0
3838
...
3939

40+
---
41+
name: hazard_enable
42+
body: |
43+
bb.0:
44+
; GCN-LABEL: name: hazard_enable
45+
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec
46+
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
47+
; GCN-NEXT: S_WAITCNT_DEPCTR 65534
48+
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
49+
; GCN-NEXT: S_ENDPGM 0
50+
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec
51+
$sgpr0_sgpr1 = S_GETPC_B64
52+
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
53+
S_ENDPGM 0
54+
...
55+
4056
---
4157
name: hazard_calls
4258
frameInfo:

0 commit comments

Comments
 (0)