Skip to content

Commit 9a44d96

Browse files
committed
Precommit test using poison
This is going to look better when using amdgcn.dead instead.
1 parent 99e1308 commit 9a44d96

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,4 +1115,137 @@ tail:
11151115
unreachable
11161116
}
11171117

1118+
; Since functions that contain amdgcn.init.whole.wave do not preserve the inactive
1119+
; lanes of any VGPRs, the middle end will explicitly preserve them if needed by adding
1120+
; dummy VGPR arguments. Since only the inactive lanes are important, we need to make
1121+
; it clear to the backend that it's safe to allocate v9 inside shader.
1122+
; FIXME: Using poison is not clear enough.
1123+
define amdgpu_cs_chain void @with_inactive_vgprs(ptr inreg %callee, i32 inreg %exec, i32 inreg %sgpr, i32 %active.vgpr, i32 %inactive.vgpr) {
1124+
; GISEL12-LABEL: with_inactive_vgprs:
1125+
; GISEL12: ; %bb.0: ; %entry
1126+
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
1127+
; GISEL12-NEXT: s_wait_expcnt 0x0
1128+
; GISEL12-NEXT: s_wait_samplecnt 0x0
1129+
; GISEL12-NEXT: s_wait_bvhcnt 0x0
1130+
; GISEL12-NEXT: s_wait_kmcnt 0x0
1131+
; GISEL12-NEXT: s_or_saveexec_b32 s6, -1
1132+
; GISEL12-NEXT: s_mov_b32 s4, s0
1133+
; GISEL12-NEXT: s_mov_b32 s5, s1
1134+
; GISEL12-NEXT: s_mov_b32 s0, s3
1135+
; GISEL12-NEXT: s_wait_alu 0xfffe
1136+
; GISEL12-NEXT: s_and_saveexec_b32 s1, s6
1137+
; GISEL12-NEXT: s_cbranch_execz .LBB6_2
1138+
; GISEL12-NEXT: ; %bb.1: ; %shader
1139+
; GISEL12-NEXT: v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
1140+
; GISEL12-NEXT: flat_load_b32 v12, v[10:11]
1141+
; GISEL12-NEXT: ;;#ASMSTART
1142+
; GISEL12-NEXT: ; use v0-7
1143+
; GISEL12-NEXT: ;;#ASMEND
1144+
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
1145+
; GISEL12-NEXT: v_add_nc_u32_e32 v8, v8, v12
1146+
; GISEL12-NEXT: flat_store_b32 v[10:11], v12
1147+
; GISEL12-NEXT: .LBB6_2: ; %tail.block
1148+
; GISEL12-NEXT: s_wait_alu 0xfffe
1149+
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s1
1150+
; GISEL12-NEXT: s_mov_b32 exec_lo, s2
1151+
; GISEL12-NEXT: s_wait_alu 0xfffe
1152+
; GISEL12-NEXT: s_setpc_b64 s[4:5]
1153+
;
1154+
; DAGISEL12-LABEL: with_inactive_vgprs:
1155+
; DAGISEL12: ; %bb.0: ; %entry
1156+
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
1157+
; DAGISEL12-NEXT: s_wait_expcnt 0x0
1158+
; DAGISEL12-NEXT: s_wait_samplecnt 0x0
1159+
; DAGISEL12-NEXT: s_wait_bvhcnt 0x0
1160+
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
1161+
; DAGISEL12-NEXT: s_or_saveexec_b32 s6, -1
1162+
; DAGISEL12-NEXT: s_mov_b32 s5, s1
1163+
; DAGISEL12-NEXT: s_mov_b32 s4, s0
1164+
; DAGISEL12-NEXT: s_wait_alu 0xfffe
1165+
; DAGISEL12-NEXT: s_and_saveexec_b32 s0, s6
1166+
; DAGISEL12-NEXT: s_cbranch_execz .LBB6_2
1167+
; DAGISEL12-NEXT: ; %bb.1: ; %shader
1168+
; DAGISEL12-NEXT: v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
1169+
; DAGISEL12-NEXT: flat_load_b32 v12, v[10:11]
1170+
; DAGISEL12-NEXT: ;;#ASMSTART
1171+
; DAGISEL12-NEXT: ; use v0-7
1172+
; DAGISEL12-NEXT: ;;#ASMEND
1173+
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
1174+
; DAGISEL12-NEXT: v_add_nc_u32_e32 v8, v8, v12
1175+
; DAGISEL12-NEXT: flat_store_b32 v[10:11], v12
1176+
; DAGISEL12-NEXT: .LBB6_2: ; %tail.block
1177+
; DAGISEL12-NEXT: s_wait_alu 0xfffe
1178+
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s0
1179+
; DAGISEL12-NEXT: s_mov_b32 s0, s3
1180+
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s2
1181+
; DAGISEL12-NEXT: s_wait_alu 0xfffe
1182+
; DAGISEL12-NEXT: s_setpc_b64 s[4:5]
1183+
;
1184+
; GISEL10-LABEL: with_inactive_vgprs:
1185+
; GISEL10: ; %bb.0: ; %entry
1186+
; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1187+
; GISEL10-NEXT: s_or_saveexec_b32 s6, -1
1188+
; GISEL10-NEXT: s_mov_b32 s4, s0
1189+
; GISEL10-NEXT: s_mov_b32 s5, s1
1190+
; GISEL10-NEXT: s_mov_b32 s0, s3
1191+
; GISEL10-NEXT: s_and_saveexec_b32 s1, s6
1192+
; GISEL10-NEXT: s_cbranch_execz .LBB6_2
1193+
; GISEL10-NEXT: ; %bb.1: ; %shader
1194+
; GISEL10-NEXT: v_mov_b32_e32 v11, s5
1195+
; GISEL10-NEXT: v_mov_b32_e32 v10, s4
1196+
; GISEL10-NEXT: flat_load_dword v12, v[10:11]
1197+
; GISEL10-NEXT: ;;#ASMSTART
1198+
; GISEL10-NEXT: ; use v0-7
1199+
; GISEL10-NEXT: ;;#ASMEND
1200+
; GISEL10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1201+
; GISEL10-NEXT: v_add_nc_u32_e32 v8, v8, v12
1202+
; GISEL10-NEXT: flat_store_dword v[10:11], v12
1203+
; GISEL10-NEXT: .LBB6_2: ; %tail.block
1204+
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s1
1205+
; GISEL10-NEXT: s_mov_b32 exec_lo, s2
1206+
; GISEL10-NEXT: s_setpc_b64 s[4:5]
1207+
;
1208+
; DAGISEL10-LABEL: with_inactive_vgprs:
1209+
; DAGISEL10: ; %bb.0: ; %entry
1210+
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1211+
; DAGISEL10-NEXT: s_or_saveexec_b32 s6, -1
1212+
; DAGISEL10-NEXT: s_mov_b32 s5, s1
1213+
; DAGISEL10-NEXT: s_mov_b32 s4, s0
1214+
; DAGISEL10-NEXT: s_and_saveexec_b32 s0, s6
1215+
; DAGISEL10-NEXT: s_cbranch_execz .LBB6_2
1216+
; DAGISEL10-NEXT: ; %bb.1: ; %shader
1217+
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s5
1218+
; DAGISEL10-NEXT: v_mov_b32_e32 v10, s4
1219+
; DAGISEL10-NEXT: flat_load_dword v12, v[10:11]
1220+
; DAGISEL10-NEXT: ;;#ASMSTART
1221+
; DAGISEL10-NEXT: ; use v0-7
1222+
; DAGISEL10-NEXT: ;;#ASMEND
1223+
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1224+
; DAGISEL10-NEXT: v_add_nc_u32_e32 v8, v8, v12
1225+
; DAGISEL10-NEXT: flat_store_dword v[10:11], v12
1226+
; DAGISEL10-NEXT: .LBB6_2: ; %tail.block
1227+
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s0
1228+
; DAGISEL10-NEXT: s_mov_b32 s0, s3
1229+
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s2
1230+
; DAGISEL10-NEXT: s_setpc_b64 s[4:5]
1231+
entry:
1232+
%1 = call i1 @llvm.amdgcn.init.whole.wave()
1233+
br i1 %1, label %shader, label %tail.block
1234+
1235+
shader: ; preds = %entry
1236+
%use.another.vgpr = load i32, ptr %callee ; smth that won't be moved past the inline asm
1237+
call void asm sideeffect "; use v0-7", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"()
1238+
store i32 %use.another.vgpr, ptr %callee
1239+
%active.vgpr.new = add i32 %active.vgpr, %use.another.vgpr
1240+
br label %tail.block
1241+
1242+
tail.block: ; preds = %.exit27, %.exit49, %244, %243, %entry
1243+
%active.vgpr.arg = phi i32 [ %active.vgpr, %entry ], [ %active.vgpr.new, %shader ]
1244+
%inactive.vgpr.arg = phi i32 [ %inactive.vgpr, %entry ], [ poison, %shader ]
1245+
%vgprs.0 = insertvalue { i32, i32 } poison, i32 %active.vgpr.arg, 0
1246+
%vgprs = insertvalue { i32, i32 } %vgprs.0, i32 %inactive.vgpr.arg, 1
1247+
call void (ptr, i32, i32, { i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.i32.sl_i32i32(ptr inreg %callee, i32 inreg %exec, i32 inreg %sgpr, { i32, i32} %vgprs, i32 0)
1248+
unreachable
1249+
}
1250+
11181251
declare amdgpu_gfx <16 x i32> @write_v0_v15(<16 x i32>)

0 commit comments

Comments
 (0)