@@ -5163,6 +5163,211 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
51635163}
51645164
51655165define amdgpu_kernel void @test_call_external_void_func_v32i32 () #0 {
5166+ ; VI-LABEL: test_call_external_void_func_v32i32:
5167+ ; VI: ; %bb.0:
5168+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
5169+ ; VI-NEXT: s_mov_b32 s7, 0xf000
5170+ ; VI-NEXT: s_mov_b32 s6, -1
5171+ ; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5172+ ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5173+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
5174+ ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
5175+ ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
5176+ ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
5177+ ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
5178+ ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
5179+ ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
5180+ ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
5181+ ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
5182+ ; VI-NEXT: s_mov_b32 s38, -1
5183+ ; VI-NEXT: s_mov_b32 s39, 0xe80000
5184+ ; VI-NEXT: s_add_u32 s36, s36, s3
5185+ ; VI-NEXT: s_addc_u32 s37, s37, 0
5186+ ; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
5187+ ; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
5188+ ; VI-NEXT: s_mov_b32 s32, 0
5189+ ; VI-NEXT: s_getpc_b64 s[8:9]
5190+ ; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
5191+ ; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
5192+ ; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
5193+ ; VI-NEXT: s_waitcnt vmcnt(6)
5194+ ; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
5195+ ; VI-NEXT: s_swappc_b64 s[30:31], s[8:9]
5196+ ; VI-NEXT: s_endpgm
5197+ ;
5198+ ; CI-LABEL: test_call_external_void_func_v32i32:
5199+ ; CI: ; %bb.0:
5200+ ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
5201+ ; CI-NEXT: s_mov_b32 s7, 0xf000
5202+ ; CI-NEXT: s_mov_b32 s6, -1
5203+ ; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5204+ ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5205+ ; CI-NEXT: s_waitcnt lgkmcnt(0)
5206+ ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
5207+ ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
5208+ ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
5209+ ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
5210+ ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
5211+ ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
5212+ ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
5213+ ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
5214+ ; CI-NEXT: s_mov_b32 s38, -1
5215+ ; CI-NEXT: s_mov_b32 s39, 0xe8f000
5216+ ; CI-NEXT: s_add_u32 s36, s36, s3
5217+ ; CI-NEXT: s_addc_u32 s37, s37, 0
5218+ ; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
5219+ ; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
5220+ ; CI-NEXT: s_mov_b32 s32, 0
5221+ ; CI-NEXT: s_getpc_b64 s[8:9]
5222+ ; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
5223+ ; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
5224+ ; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
5225+ ; CI-NEXT: s_waitcnt vmcnt(6)
5226+ ; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
5227+ ; CI-NEXT: s_swappc_b64 s[30:31], s[8:9]
5228+ ; CI-NEXT: s_endpgm
5229+ ;
5230+ ; SDAG-LABEL: test_call_external_void_func_v32i32:
5231+ ; SDAG: ; %bb.0:
5232+ ; SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
5233+ ; SDAG-NEXT: s_mov_b32 s7, 0xf000
5234+ ; SDAG-NEXT: s_mov_b32 s6, -1
5235+ ; SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5236+ ; SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5237+ ; SDAG-NEXT: s_waitcnt lgkmcnt(0)
5238+ ; SDAG-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
5239+ ; SDAG-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
5240+ ; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
5241+ ; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
5242+ ; SDAG-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
5243+ ; SDAG-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
5244+ ; SDAG-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
5245+ ; SDAG-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
5246+ ; SDAG-NEXT: s_mov_b32 s38, -1
5247+ ; SDAG-NEXT: s_mov_b32 s39, 0xe00000
5248+ ; SDAG-NEXT: s_add_u32 s36, s36, s3
5249+ ; SDAG-NEXT: s_addc_u32 s37, s37, 0
5250+ ; SDAG-NEXT: s_mov_b64 s[6:7], s[0:1]
5251+ ; SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
5252+ ; SDAG-NEXT: s_mov_b32 s32, 0
5253+ ; SDAG-NEXT: s_getpc_b64 s[8:9]
5254+ ; SDAG-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
5255+ ; SDAG-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
5256+ ; SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
5257+ ; SDAG-NEXT: s_waitcnt vmcnt(6)
5258+ ; SDAG-NEXT: buffer_store_dword v31, off, s[36:39], s32
5259+ ; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
5260+ ; SDAG-NEXT: s_endpgm
5261+ ;
5262+ ; GFX11-LABEL: test_call_external_void_func_v32i32:
5263+ ; GFX11: ; %bb.0:
5264+ ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0
5265+ ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
5266+ ; GFX11-NEXT: s_mov_b32 s6, -1
5267+ ; GFX11-NEXT: s_mov_b32 s32, 0
5268+ ; GFX11-NEXT: s_getpc_b64 s[2:3]
5269+ ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32@rel32@lo+4
5270+ ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32@rel32@hi+12
5271+ ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
5272+ ; GFX11-NEXT: s_clause 0x7
5273+ ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
5274+ ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0
5275+ ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
5276+ ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
5277+ ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
5278+ ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
5279+ ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
5280+ ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
5281+ ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
5282+ ; GFX11-NEXT: s_waitcnt vmcnt(7)
5283+ ; GFX11-NEXT: scratch_store_b32 off, v31, s32
5284+ ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
5285+ ; GFX11-NEXT: s_endpgm
5286+ ;
5287+ ; HSA-LABEL: test_call_external_void_func_v32i32:
5288+ ; HSA: ; %bb.0:
5289+ ; HSA-NEXT: s_add_i32 s6, s6, s9
5290+ ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
5291+ ; HSA-NEXT: s_add_u32 s0, s0, s9
5292+ ; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
5293+ ; HSA-NEXT: s_mov_b32 s11, 0x1100f000
5294+ ; HSA-NEXT: s_mov_b32 s10, -1
5295+ ; HSA-NEXT: s_addc_u32 s1, s1, 0
5296+ ; HSA-NEXT: s_waitcnt lgkmcnt(0)
5297+ ; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
5298+ ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
5299+ ; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
5300+ ; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
5301+ ; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
5302+ ; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
5303+ ; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
5304+ ; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
5305+ ; HSA-NEXT: s_mov_b32 s32, 0
5306+ ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7
5307+ ; HSA-NEXT: s_getpc_b64 s[12:13]
5308+ ; HSA-NEXT: s_add_u32 s12, s12, external_void_func_v32i32@rel32@lo+4
5309+ ; HSA-NEXT: s_addc_u32 s13, s13, external_void_func_v32i32@rel32@hi+12
5310+ ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
5311+ ; HSA-NEXT: s_waitcnt vmcnt(7)
5312+ ; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32
5313+ ; HSA-NEXT: s_swappc_b64 s[30:31], s[12:13]
5314+ ; HSA-NEXT: s_endpgm
5315+ ;
5316+ ; GISEL-LABEL: test_call_external_void_func_v32i32:
5317+ ; GISEL: ; %bb.0:
5318+ ; GISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
5319+ ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
5320+ ; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5321+ ; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5322+ ; GISEL-NEXT: s_mov_b32 s54, -1
5323+ ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
5324+ ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x40
5325+ ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x0
5326+ ; GISEL-NEXT: s_mov_b32 s55, 0xe00000
5327+ ; GISEL-NEXT: s_add_u32 s52, s52, s3
5328+ ; GISEL-NEXT: s_addc_u32 s53, s53, 0
5329+ ; GISEL-NEXT: s_mov_b32 s32, 0
5330+ ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
5331+ ; GISEL-NEXT: v_mov_b32_e32 v0, s23
5332+ ; GISEL-NEXT: s_mov_b64 s[0:1], s[52:53]
5333+ ; GISEL-NEXT: s_getpc_b64 s[4:5]
5334+ ; GISEL-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4
5335+ ; GISEL-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12
5336+ ; GISEL-NEXT: buffer_store_dword v0, off, s[52:55], s32
5337+ ; GISEL-NEXT: v_mov_b32_e32 v0, s36
5338+ ; GISEL-NEXT: v_mov_b32_e32 v1, s37
5339+ ; GISEL-NEXT: v_mov_b32_e32 v2, s38
5340+ ; GISEL-NEXT: v_mov_b32_e32 v3, s39
5341+ ; GISEL-NEXT: v_mov_b32_e32 v4, s40
5342+ ; GISEL-NEXT: v_mov_b32_e32 v5, s41
5343+ ; GISEL-NEXT: v_mov_b32_e32 v6, s42
5344+ ; GISEL-NEXT: v_mov_b32_e32 v7, s43
5345+ ; GISEL-NEXT: v_mov_b32_e32 v8, s44
5346+ ; GISEL-NEXT: v_mov_b32_e32 v9, s45
5347+ ; GISEL-NEXT: v_mov_b32_e32 v10, s46
5348+ ; GISEL-NEXT: v_mov_b32_e32 v11, s47
5349+ ; GISEL-NEXT: v_mov_b32_e32 v12, s48
5350+ ; GISEL-NEXT: v_mov_b32_e32 v13, s49
5351+ ; GISEL-NEXT: v_mov_b32_e32 v14, s50
5352+ ; GISEL-NEXT: v_mov_b32_e32 v15, s51
5353+ ; GISEL-NEXT: v_mov_b32_e32 v16, s8
5354+ ; GISEL-NEXT: v_mov_b32_e32 v17, s9
5355+ ; GISEL-NEXT: v_mov_b32_e32 v18, s10
5356+ ; GISEL-NEXT: v_mov_b32_e32 v19, s11
5357+ ; GISEL-NEXT: s_mov_b64 s[2:3], s[54:55]
5358+ ; GISEL-NEXT: v_mov_b32_e32 v20, s12
5359+ ; GISEL-NEXT: v_mov_b32_e32 v21, s13
5360+ ; GISEL-NEXT: v_mov_b32_e32 v22, s14
5361+ ; GISEL-NEXT: v_mov_b32_e32 v23, s15
5362+ ; GISEL-NEXT: v_mov_b32_e32 v24, s16
5363+ ; GISEL-NEXT: v_mov_b32_e32 v25, s17
5364+ ; GISEL-NEXT: v_mov_b32_e32 v26, s18
5365+ ; GISEL-NEXT: v_mov_b32_e32 v27, s19
5366+ ; GISEL-NEXT: v_mov_b32_e32 v28, s20
5367+ ; GISEL-NEXT: v_mov_b32_e32 v29, s21
5368+ ; GISEL-NEXT: v_mov_b32_e32 v30, s22
5369+ ; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
5370+ ; GISEL-NEXT: s_endpgm
51665371 %ptr = load ptr addrspace (1 ), ptr addrspace (4 ) poison
51675372 %val = load <32 x i32 >, ptr addrspace (1 ) %ptr
51685373 call void @external_void_func_v32i32 (<32 x i32 > %val )
@@ -5343,11 +5548,11 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
53435548; GISEL-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
53445549; GISEL-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
53455550; GISEL-NEXT: s_mov_b32 s54, -1
5346- ; GISEL-NEXT: s_mov_b32 s55, 0xe00000
53475551; GISEL-NEXT: s_waitcnt lgkmcnt(0)
5348- ; GISEL-NEXT: s_load_dword s2, s[0:1], 0x0
53495552; GISEL-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x40
53505553; GISEL-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x0
5554+ ; GISEL-NEXT: s_load_dword s2, s[0:1], 0x0
5555+ ; GISEL-NEXT: s_mov_b32 s55, 0xe00000
53515556; GISEL-NEXT: s_add_u32 s52, s52, s5
53525557; GISEL-NEXT: s_mov_b32 s32, 0
53535558; GISEL-NEXT: s_addc_u32 s53, s53, 0
0 commit comments