@@ -1312,8 +1312,8 @@ main_body:
13121312 ret void
13131313}
13141314
1315- define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged (<4 x i32 > inreg %rsrc ) {
1316- ; PREGFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged :
1315+ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 (<4 x i32 > inreg %rsrc ) {
1316+ ; PREGFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 :
13171317; PREGFX10: ; %bb.0: ; %main_body
13181318; PREGFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
13191319; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8
@@ -1327,7 +1327,7 @@ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged(<4 x i32> i
13271327; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
13281328; PREGFX10-NEXT: s_endpgm
13291329;
1330- ; GFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged :
1330+ ; GFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 :
13311331; GFX10: ; %bb.0: ; %main_body
13321332; GFX10-NEXT: s_clause 0x5
13331333; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4
@@ -1342,7 +1342,7 @@ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged(<4 x i32> i
13421342; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
13431343; GFX10-NEXT: s_endpgm
13441344;
1345- ; GFX11-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged :
1345+ ; GFX11-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 :
13461346; GFX11: ; %bb.0: ; %main_body
13471347; GFX11-NEXT: s_clause 0x5
13481348; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 offset:4
@@ -1357,7 +1357,7 @@ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged(<4 x i32> i
13571357; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
13581358; GFX11-NEXT: s_endpgm
13591359;
1360- ; GFX12-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged :
1360+ ; GFX12-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged_pregfx12 :
13611361; GFX12: ; %bb.0: ; %main_body
13621362; GFX12-NEXT: s_clause 0x1
13631363; GFX12-NEXT: buffer_load_b128 v[0:3], off, s[0:3], null offset:4 scope:SCOPE_SE
@@ -1379,6 +1379,65 @@ main_body:
13791379 ret void
13801380}
13811381
1382+ define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged (<4 x i32 > inreg %rsrc ) {
1383+ ; PREGFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged:
1384+ ; PREGFX10: ; %bb.0: ; %main_body
1385+ ; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1386+ ; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1387+ ; PREGFX10-NEXT: s_waitcnt vmcnt(1)
1388+ ; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1389+ ; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1390+ ; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1391+ ; PREGFX10-NEXT: s_endpgm
1392+ ;
1393+ ; GFX10-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged:
1394+ ; GFX10: ; %bb.0: ; %main_body
1395+ ; GFX10-NEXT: s_clause 0x1
1396+ ; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1397+ ; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1398+ ; GFX10-NEXT: s_waitcnt vmcnt(1)
1399+ ; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm
1400+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
1401+ ; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm
1402+ ; GFX10-NEXT: s_endpgm
1403+ ;
1404+ ; GFX11-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged:
1405+ ; GFX11: ; %bb.0: ; %main_body
1406+ ; GFX11-NEXT: s_clause 0x1
1407+ ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
1408+ ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28
1409+ ; GFX11-NEXT: s_waitcnt vmcnt(1)
1410+ ; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
1411+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
1412+ ; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done
1413+ ; GFX11-NEXT: s_endpgm
1414+ ;
1415+ ; GFX12-LABEL: raw_buffer_load_x1_offset_swizzled_not_merged:
1416+ ; GFX12: ; %bb.0: ; %main_body
1417+ ; GFX12-NEXT: s_clause 0x5
1418+ ; GFX12-NEXT: buffer_load_b32 v0, off, s[0:3], null offset:4
1419+ ; GFX12-NEXT: buffer_load_b32 v1, off, s[0:3], null offset:8
1420+ ; GFX12-NEXT: buffer_load_b32 v2, off, s[0:3], null offset:12
1421+ ; GFX12-NEXT: buffer_load_b32 v3, off, s[0:3], null offset:16
1422+ ; GFX12-NEXT: buffer_load_b32 v4, off, s[0:3], null offset:28
1423+ ; GFX12-NEXT: buffer_load_b32 v5, off, s[0:3], null offset:32
1424+ ; GFX12-NEXT: s_wait_loadcnt 0x2
1425+ ; GFX12-NEXT: export mrt0 v0, v1, v2, v3 done
1426+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1427+ ; GFX12-NEXT: export mrt0 v4, v5, v0, v0 done
1428+ ; GFX12-NEXT: s_endpgm
1429+ main_body:
1430+ %r1 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 4 , i32 0 , i32 64 )
1431+ %r2 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 8 , i32 0 , i32 64 )
1432+ %r3 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 12 , i32 0 , i32 64 )
1433+ %r4 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 16 , i32 0 , i32 64 )
1434+ %r5 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 28 , i32 0 , i32 64 )
1435+ %r6 = call float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 > %rsrc , i32 32 , i32 0 , i32 64 )
1436+ call void @llvm.amdgcn.exp.f32 (i32 0 , i32 15 , float %r1 , float %r2 , float %r3 , float %r4 , i1 true , i1 true )
1437+ call void @llvm.amdgcn.exp.f32 (i32 0 , i32 15 , float %r5 , float %r6 , float undef , float undef , i1 true , i1 true )
1438+ ret void
1439+ }
1440+
13821441declare float @llvm.amdgcn.raw.buffer.load.f32 (<4 x i32 >, i32 , i32 , i32 ) #0
13831442declare <2 x float > @llvm.amdgcn.raw.buffer.load.v2f32 (<4 x i32 >, i32 , i32 , i32 ) #0
13841443declare <4 x float > @llvm.amdgcn.raw.buffer.load.v4f32 (<4 x i32 >, i32 , i32 , i32 ) #0
0 commit comments