@@ -2986,14 +2986,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
29862986; GFX6-NEXT: s_waitcnt lgkmcnt(0)
29872987; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
29882988; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
2989- ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
29902989; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
29912990; GFX6-NEXT: s_waitcnt vmcnt(0)
29922991; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
2993- ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
29942992; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
2995- ; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000, v2
2996- ; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
2993+ ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
2994+ ; GFX6-NEXT: v_max_f32_e32 v3, 0, v3
2995+ ; GFX6-NEXT: v_max_f32_e32 v2, s0, v2
2996+ ; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
29972997; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
29982998; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
29992999; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3006,20 +3006,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
30063006; GFX8: ; %bb.0:
30073007; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
30083008; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
3009- ; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
30103009; GFX8-NEXT: s_waitcnt lgkmcnt(0)
30113010; GFX8-NEXT: v_mov_b32_e32 v1, s3
30123011; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
30133012; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
30143013; GFX8-NEXT: flat_load_dword v3, v[0:1]
30153014; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3015+ ; GFX8-NEXT: v_mov_b32_e32 v4, s0
30163016; GFX8-NEXT: v_mov_b32_e32 v1, s1
30173017; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
30183018; GFX8-NEXT: s_waitcnt vmcnt(0)
30193019; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
30203020; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
30213021; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
3022- ; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00 , v3
3022+ ; GFX8-NEXT: v_max_f16_e32 v3, s0 , v3
30233023; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
30243024; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
30253025; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
@@ -3747,16 +3747,16 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
37473747; GFX6-NEXT: s_waitcnt lgkmcnt(0)
37483748; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
37493749; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
3750- ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
37513750; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
37523751; GFX6-NEXT: s_waitcnt vmcnt(0)
37533752; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
37543753; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
37553754; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
3756- ; GFX6-NEXT: v_max_f32_e32 v3, 0x7fc00000, v3
3755+ ; GFX6-NEXT: v_max_f32_e32 v3, s0, v3
3756+ ; GFX6-NEXT: v_max_f32_e32 v2, 0, v2
37573757; GFX6-NEXT: v_min_f32_e32 v3, 1.0, v3
37583758; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
3759- ; GFX6-NEXT: v_med3_f32 v2, v2, 0, v4
3759+ ; GFX6-NEXT: v_min_f32_e32 v2, s0, v2
37603760; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
37613761; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
37623762; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
@@ -3779,9 +3779,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
37793779; GFX8-NEXT: s_waitcnt vmcnt(0)
37803780; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
37813781; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
3782- ; GFX8-NEXT: v_max_f16_e32 v2, 0x7e00 , v2
3782+ ; GFX8-NEXT: v_max_f16_e32 v2, s0 , v2
37833783; GFX8-NEXT: v_max_f16_e32 v3, 0, v3
3784- ; GFX8-NEXT: v_min_f16_e32 v3, 0x7e00 , v3
3784+ ; GFX8-NEXT: v_min_f16_e32 v3, s0 , v3
37853785; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
37863786; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
37873787; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -3845,14 +3845,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
38453845; GFX6-NEXT: s_waitcnt lgkmcnt(0)
38463846; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
38473847; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
3848- ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fc00000
38493848; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7]
38503849; GFX6-NEXT: s_waitcnt vmcnt(0)
38513850; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3852- ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
38533851; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
3854- ; GFX6-NEXT: v_max_f32_e32 v2, 0x7fc00000, v2
3855- ; GFX6-NEXT: v_med3_f32 v3, v3, 0, v4
3852+ ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
3853+ ; GFX6-NEXT: v_max_f32_e32 v3, 0, v3
3854+ ; GFX6-NEXT: v_max_f32_e32 v2, s0, v2
3855+ ; GFX6-NEXT: v_min_f32_e32 v3, s0, v3
38563856; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
38573857; GFX6-NEXT: v_min_f32_e32 v2, 1.0, v2
38583858; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
@@ -3865,20 +3865,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
38653865; GFX8: ; %bb.0:
38663866; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
38673867; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
3868- ; GFX8-NEXT: v_mov_b32_e32 v4, 0x7e00
38693868; GFX8-NEXT: s_waitcnt lgkmcnt(0)
38703869; GFX8-NEXT: v_mov_b32_e32 v1, s3
38713870; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
38723871; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
38733872; GFX8-NEXT: flat_load_dword v3, v[0:1]
38743873; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3874+ ; GFX8-NEXT: v_mov_b32_e32 v4, s0
38753875; GFX8-NEXT: v_mov_b32_e32 v1, s1
38763876; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
38773877; GFX8-NEXT: s_waitcnt vmcnt(0)
38783878; GFX8-NEXT: v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38793879; GFX8-NEXT: v_max_f16_e32 v3, v3, v3
38803880; GFX8-NEXT: v_max_f16_e32 v2, 0, v2
3881- ; GFX8-NEXT: v_max_f16_e32 v3, 0x7e00 , v3
3881+ ; GFX8-NEXT: v_max_f16_e32 v3, s0 , v3
38823882; GFX8-NEXT: v_min_f16_e32 v3, 1.0, v3
38833883; GFX8-NEXT: v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38843884; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
0 commit comments