@@ -1956,6 +1956,165 @@ bb.1:
19561956 ret void
19571957}
19581958
1959+ define amdgpu_ps void @scc_use_after_kill_inst (float inreg %x , i32 inreg %y ) #0 {
1960+ ; SI-LABEL: scc_use_after_kill_inst:
1961+ ; SI: ; %bb.0: ; %bb
1962+ ; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
1963+ ; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
1964+ ; SI-NEXT: s_mov_b64 s[2:3], exec
1965+ ; SI-NEXT: s_cmp_lg_u32 s1, 0
1966+ ; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
1967+ ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
1968+ ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
1969+ ; SI-NEXT: s_cbranch_scc0 .LBB17_6
1970+ ; SI-NEXT: ; %bb.1: ; %bb
1971+ ; SI-NEXT: s_andn2_b64 exec, exec, vcc
1972+ ; SI-NEXT: s_cbranch_scc0 .LBB17_3
1973+ ; SI-NEXT: ; %bb.2: ; %bb8
1974+ ; SI-NEXT: s_mov_b32 s3, 0xf000
1975+ ; SI-NEXT: s_mov_b32 s2, -1
1976+ ; SI-NEXT: v_mov_b32_e32 v0, 8
1977+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1978+ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1979+ ; SI-NEXT: v_mov_b32_e32 v0, 4.0
1980+ ; SI-NEXT: .LBB17_3: ; %phibb
1981+ ; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
1982+ ; SI-NEXT: s_cbranch_vccz .LBB17_5
1983+ ; SI-NEXT: ; %bb.4: ; %bb10
1984+ ; SI-NEXT: s_mov_b32 s3, 0xf000
1985+ ; SI-NEXT: s_mov_b32 s2, -1
1986+ ; SI-NEXT: v_mov_b32_e32 v0, 9
1987+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1988+ ; SI-NEXT: s_waitcnt vmcnt(0)
1989+ ; SI-NEXT: .LBB17_5: ; %end
1990+ ; SI-NEXT: s_endpgm
1991+ ; SI-NEXT: .LBB17_6:
1992+ ; SI-NEXT: s_mov_b64 exec, 0
1993+ ; SI-NEXT: exp null off, off, off, off done vm
1994+ ; SI-NEXT: s_endpgm
1995+ ;
1996+ ; GFX10-WAVE64-LABEL: scc_use_after_kill_inst:
1997+ ; GFX10-WAVE64: ; %bb.0: ; %bb
1998+ ; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
1999+ ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
2000+ ; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s1, 0
2001+ ; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
2002+ ; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
2003+ ; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
2004+ ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
2005+ ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB17_6
2006+ ; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
2007+ ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
2008+ ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB17_3
2009+ ; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
2010+ ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
2011+ ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0
2012+ ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off
2013+ ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
2014+ ; GFX10-WAVE64-NEXT: .LBB17_3: ; %phibb
2015+ ; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
2016+ ; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB17_5
2017+ ; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
2018+ ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
2019+ ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
2020+ ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
2021+ ; GFX10-WAVE64-NEXT: .LBB17_5: ; %end
2022+ ; GFX10-WAVE64-NEXT: s_endpgm
2023+ ; GFX10-WAVE64-NEXT: .LBB17_6:
2024+ ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
2025+ ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
2026+ ; GFX10-WAVE64-NEXT: s_endpgm
2027+ ;
2028+ ; GFX10-WAVE32-LABEL: scc_use_after_kill_inst:
2029+ ; GFX10-WAVE32: ; %bb.0: ; %bb
2030+ ; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
2031+ ; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo
2032+ ; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s1, 0
2033+ ; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
2034+ ; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
2035+ ; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
2036+ ; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, vcc_lo
2037+ ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB17_6
2038+ ; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
2039+ ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
2040+ ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB17_3
2041+ ; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
2042+ ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
2043+ ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0
2044+ ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off
2045+ ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
2046+ ; GFX10-WAVE32-NEXT: .LBB17_3: ; %phibb
2047+ ; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
2048+ ; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB17_5
2049+ ; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
2050+ ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
2051+ ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
2052+ ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
2053+ ; GFX10-WAVE32-NEXT: .LBB17_5: ; %end
2054+ ; GFX10-WAVE32-NEXT: s_endpgm
2055+ ; GFX10-WAVE32-NEXT: .LBB17_6:
2056+ ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
2057+ ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
2058+ ; GFX10-WAVE32-NEXT: s_endpgm
2059+ ;
2060+ ; GFX11-LABEL: scc_use_after_kill_inst:
2061+ ; GFX11: ; %bb.0: ; %bb
2062+ ; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0
2063+ ; GFX11-NEXT: s_mov_b64 s[2:3], exec
2064+ ; GFX11-NEXT: s_cmp_lg_u32 s1, 0
2065+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2066+ ; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
2067+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
2068+ ; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
2069+ ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
2070+ ; GFX11-NEXT: s_cbranch_scc0 .LBB17_6
2071+ ; GFX11-NEXT: ; %bb.1: ; %bb
2072+ ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
2073+ ; GFX11-NEXT: s_cbranch_scc0 .LBB17_3
2074+ ; GFX11-NEXT: ; %bb.2: ; %bb8
2075+ ; GFX11-NEXT: v_mov_b32_e32 v1, 8
2076+ ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
2077+ ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
2078+ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2079+ ; GFX11-NEXT: .LBB17_3: ; %phibb
2080+ ; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
2081+ ; GFX11-NEXT: s_cbranch_vccz .LBB17_5
2082+ ; GFX11-NEXT: ; %bb.4: ; %bb10
2083+ ; GFX11-NEXT: v_mov_b32_e32 v0, 9
2084+ ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
2085+ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2086+ ; GFX11-NEXT: .LBB17_5: ; %end
2087+ ; GFX11-NEXT: s_endpgm
2088+ ; GFX11-NEXT: .LBB17_6:
2089+ ; GFX11-NEXT: s_mov_b64 exec, 0
2090+ ; GFX11-NEXT: exp mrt0 off, off, off, off done
2091+ ; GFX11-NEXT: s_endpgm
2092+ bb:
2093+ %tmp = fadd float %x , 1 .000000e+00
2094+ %tmp1 = fcmp olt float 0 .000000e+00 , %tmp
2095+ %tmp2 = select i1 %tmp1 , float -1 .000000e+00 , float 0 .000000e+00
2096+ %cmp.tmp2 = fcmp olt float %tmp2 , 0 .000000e+00
2097+ %uniform.cond = icmp eq i32 %y , 0
2098+ call void @llvm.amdgcn.kill (i1 %cmp.tmp2 )
2099+ br i1 %uniform.cond , label %phibb , label %bb8
2100+
2101+ phibb: ; preds = %bb8, %bb
2102+ %tmp5 = phi float [ %tmp2 , %bb ], [ 4 .000000e+00 , %bb8 ]
2103+ %tmp6 = fcmp oeq float %tmp5 , 0 .000000e+00
2104+ br i1 %tmp6 , label %bb10 , label %end
2105+
2106+ bb8: ; preds = %bb
2107+ store volatile i32 8 , ptr addrspace (1 ) poison, align 4
2108+ br label %phibb
2109+
2110+ bb10: ; preds = %phibb
2111+ store volatile i32 9 , ptr addrspace (1 ) poison, align 4
2112+ br label %end
2113+
2114+ end: ; preds = %bb10, %phibb
2115+ ret void
2116+ }
2117+
19592118declare void @llvm.amdgcn.exp.f32 (i32 immarg, i32 immarg, float , float , float , float , i1 immarg, i1 immarg) #3
19602119declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32 (i32 immarg, float , float , float , float , <8 x i32 >, <4 x i32 >, i1 immarg, i32 immarg, i32 immarg) #1
19612120declare <4 x float > @llvm.amdgcn.image.sample.c.1d.v4f32.f32 (i32 , float , float , <8 x i32 >, <4 x i32 >, i1 , i32 , i32 ) #1
0 commit comments