@@ -1327,43 +1327,36 @@ define amdgpu_ps void @load_uniform_P4_i16_b16_gfx12(ptr addrspace(4) inreg %ptr
13271327define amdgpu_ps void @load_uniform_P4_i16_b16_gfx11 (ptr addrspace (4 ) inreg %ptra , ptr addrspace (4 ) inreg %ptrb , ptr addrspace (1 ) %out ) {
13281328; GFX7-LABEL: load_uniform_P4_i16_b16_gfx11:
13291329; GFX7: ; %bb.0:
1330- ; GFX7-NEXT: s_mov_b32 s2, -1
1331- ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1332- ; GFX7-NEXT: buffer_load_ushort v2, off, s[0:3], 0
1330+ ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x0
13331331; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
1334- ; GFX7-NEXT: s_mov_b32 s2, 0
1335- ; GFX7-NEXT: s_waitcnt vmcnt(0)
1336- ; GFX7-NEXT: v_readfirstlane_b32 s1, v2
1332+ ; GFX7-NEXT: s_mov_b32 s3, 0xf000
13371333; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1338- ; GFX7-NEXT: s_add_i32 s0, s1 , s0
1334+ ; GFX7-NEXT: s_add_i32 s0, s2 , s0
13391335; GFX7-NEXT: v_mov_b32_e32 v2, s0
1336+ ; GFX7-NEXT: s_mov_b32 s2, 0
13401337; GFX7-NEXT: s_mov_b64 s[0:1], 0
13411338; GFX7-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
13421339; GFX7-NEXT: s_endpgm
13431340;
13441341; GFX11-True16-LABEL: load_uniform_P4_i16_b16_gfx11:
13451342; GFX11-True16: ; %bb.0:
1346- ; GFX11-True16-NEXT: v_mov_b32_e32 v2, 0
1347- ; GFX11-True16-NEXT: global_load_d16_b16 v2, v2, s[0:1]
1343+ ; GFX11-True16-NEXT: s_clause 0x1
1344+ ; GFX11-True16-NEXT: s_load_b32 s2, s[0:1], 0x0
13481345; GFX11-True16-NEXT: s_load_b32 s0, s[0:1], 0x0
1349- ; GFX11-True16-NEXT: s_waitcnt vmcnt(0)
1350- ; GFX11-True16-NEXT: v_readfirstlane_b32 s1, v2
13511346; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
1352- ; GFX11-True16-NEXT: s_add_i32 s0, s1 , s0
1347+ ; GFX11-True16-NEXT: s_add_i32 s0, s2 , s0
13531348; GFX11-True16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
13541349; GFX11-True16-NEXT: v_mov_b16_e32 v2.l, s0
13551350; GFX11-True16-NEXT: global_store_b16 v[0:1], v2, off
13561351; GFX11-True16-NEXT: s_endpgm
13571352;
13581353; GFX11-NoTrue16-LABEL: load_uniform_P4_i16_b16_gfx11:
13591354; GFX11-NoTrue16: ; %bb.0:
1360- ; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, 0
1361- ; GFX11-NoTrue16-NEXT: global_load_u16 v2, v2, s[0:1]
1355+ ; GFX11-NoTrue16-NEXT: s_clause 0x1
1356+ ; GFX11-NoTrue16-NEXT: s_load_b32 s2, s[0:1], 0x0
13621357; GFX11-NoTrue16-NEXT: s_load_b32 s0, s[0:1], 0x0
1363- ; GFX11-NoTrue16-NEXT: s_waitcnt vmcnt(0)
1364- ; GFX11-NoTrue16-NEXT: v_readfirstlane_b32 s1, v2
13651358; GFX11-NoTrue16-NEXT: s_waitcnt lgkmcnt(0)
1366- ; GFX11-NoTrue16-NEXT: s_add_i32 s0, s1 , s0
1359+ ; GFX11-NoTrue16-NEXT: s_add_i32 s0, s2 , s0
13671360; GFX11-NoTrue16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
13681361; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, s0
13691362; GFX11-NoTrue16-NEXT: global_store_b16 v[0:1], v2, off
@@ -1488,43 +1481,36 @@ define amdgpu_ps void @load_uniform_P4_i16_anyextending_gfx12(ptr addrspace(4) i
14881481define amdgpu_ps void @load_uniform_P4_i16_anyextending_gfx11 (ptr addrspace (4 ) inreg %ptra , ptr addrspace (4 ) inreg %ptrb , ptr addrspace (1 ) %out ) {
14891482; GFX7-LABEL: load_uniform_P4_i16_anyextending_gfx11:
14901483; GFX7: ; %bb.0:
1491- ; GFX7-NEXT: s_mov_b32 s2, -1
1492- ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1493- ; GFX7-NEXT: buffer_load_ushort v2, off, s[0:3], 0
1484+ ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x0
14941485; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
1495- ; GFX7-NEXT: s_mov_b32 s2, 0
1496- ; GFX7-NEXT: s_waitcnt vmcnt(0)
1497- ; GFX7-NEXT: v_readfirstlane_b32 s1, v2
1486+ ; GFX7-NEXT: s_mov_b32 s3, 0xf000
14981487; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1499- ; GFX7-NEXT: s_add_i32 s0, s1 , s0
1488+ ; GFX7-NEXT: s_add_i32 s0, s2 , s0
15001489; GFX7-NEXT: v_mov_b32_e32 v2, s0
1490+ ; GFX7-NEXT: s_mov_b32 s2, 0
15011491; GFX7-NEXT: s_mov_b64 s[0:1], 0
15021492; GFX7-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
15031493; GFX7-NEXT: s_endpgm
15041494;
15051495; GFX11-True16-LABEL: load_uniform_P4_i16_anyextending_gfx11:
15061496; GFX11-True16: ; %bb.0:
1507- ; GFX11-True16-NEXT: v_mov_b32_e32 v2, 0
1508- ; GFX11-True16-NEXT: global_load_d16_b16 v2, v2, s[0:1]
1497+ ; GFX11-True16-NEXT: s_clause 0x1
1498+ ; GFX11-True16-NEXT: s_load_b32 s2, s[0:1], 0x0
15091499; GFX11-True16-NEXT: s_load_b32 s0, s[0:1], 0x0
1510- ; GFX11-True16-NEXT: s_waitcnt vmcnt(0)
1511- ; GFX11-True16-NEXT: v_readfirstlane_b32 s1, v2
15121500; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
1513- ; GFX11-True16-NEXT: s_add_i32 s0, s1 , s0
1501+ ; GFX11-True16-NEXT: s_add_i32 s0, s2 , s0
15141502; GFX11-True16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
15151503; GFX11-True16-NEXT: v_mov_b16_e32 v2.l, s0
15161504; GFX11-True16-NEXT: global_store_b16 v[0:1], v2, off
15171505; GFX11-True16-NEXT: s_endpgm
15181506;
15191507; GFX11-NoTrue16-LABEL: load_uniform_P4_i16_anyextending_gfx11:
15201508; GFX11-NoTrue16: ; %bb.0:
1521- ; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, 0
1522- ; GFX11-NoTrue16-NEXT: global_load_u16 v2, v2, s[0:1]
1509+ ; GFX11-NoTrue16-NEXT: s_clause 0x1
1510+ ; GFX11-NoTrue16-NEXT: s_load_b32 s2, s[0:1], 0x0
15231511; GFX11-NoTrue16-NEXT: s_load_b32 s0, s[0:1], 0x0
1524- ; GFX11-NoTrue16-NEXT: s_waitcnt vmcnt(0)
1525- ; GFX11-NoTrue16-NEXT: v_readfirstlane_b32 s1, v2
15261512; GFX11-NoTrue16-NEXT: s_waitcnt lgkmcnt(0)
1527- ; GFX11-NoTrue16-NEXT: s_add_i32 s0, s1 , s0
1513+ ; GFX11-NoTrue16-NEXT: s_add_i32 s0, s2 , s0
15281514; GFX11-NoTrue16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
15291515; GFX11-NoTrue16-NEXT: v_mov_b32_e32 v2, s0
15301516; GFX11-NoTrue16-NEXT: global_store_b16 v[0:1], v2, off
0 commit comments