@@ -108,15 +108,12 @@ define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace
108108; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
109109; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
110110; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
111- ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0 , 1, v0
111+ ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v1 , 1, v0
112112; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0)
113- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0 , s[0:1] glc dlc
113+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_b16 v0, v1 , s[0:1] glc dlc
114114; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
115- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0 , s[2:3] glc dlc
115+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_hi_b16 v0, v1 , s[2:3] glc dlc
116116; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
117- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l
118- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l
119- ; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
120117; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
121118; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h
122119; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -240,15 +237,12 @@ define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrs
240237; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
241238; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
242239; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
243- ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0 , 1, v0
240+ ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v1 , 1, v0
244241; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0)
245- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0 , s[0:1] glc dlc
242+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_b16 v0, v1 , s[0:1] glc dlc
246243; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
247- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0 , s[2:3] glc dlc
244+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_hi_b16 v0, v1 , s[2:3] glc dlc
248245; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
249- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l
250- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l
251- ; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
252246; GFX11-GISEL-REAL16-NEXT: v_subrev_f16_e32 v0.l, 2.0, v0.l
253247; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h
254248; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -486,15 +480,12 @@ define amdgpu_kernel void @v_pack_b32.fabs(ptr addrspace(1) %in0, ptr addrspace(
486480; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
487481; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
488482; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
489- ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0 , 1, v0
483+ ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v1 , 1, v0
490484; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0)
491- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0 , s[0:1] glc dlc
485+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_b16 v0, v1 , s[0:1] glc dlc
492486; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
493- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0 , s[2:3] glc dlc
487+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_hi_b16 v0, v1 , s[2:3] glc dlc
494488; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
495- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l
496- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l
497- ; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
498489; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
499490; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h
500491; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -620,15 +611,12 @@ define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace(
620611; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
621612; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
622613; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
623- ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0 , 1, v0
614+ ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v1 , 1, v0
624615; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0)
625- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0 , s[0:1] glc dlc
616+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_b16 v0, v1 , s[0:1] glc dlc
626617; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
627- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0 , s[2:3] glc dlc
618+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_hi_b16 v0, v1 , s[2:3] glc dlc
628619; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
629- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l
630- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l
631- ; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
632620; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
633621; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h
634622; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
0 commit comments