Skip to content

Commit 8373adb

Browse files
committed
[AMDGPU] Allow lane-op lowering for illegal types
Currently overloaded lane-op intrinsics only work for legal types. It fails with 'Do not know how to promote this operator' with SDag on the i8 type notably. The patch fixes that.
1 parent 0751418 commit 8373adb

File tree

9 files changed

+557
-0
lines changed

9 files changed

+557
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6552,6 +6552,17 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
65526552
Results.push_back(LoadVal);
65536553
return;
65546554
}
6555+
case Intrinsic::amdgcn_readlane:
6556+
case Intrinsic::amdgcn_readfirstlane:
6557+
case Intrinsic::amdgcn_writelane:
6558+
case Intrinsic::amdgcn_permlane16:
6559+
case Intrinsic::amdgcn_permlanex16:
6560+
case Intrinsic::amdgcn_permlane64:
6561+
case Intrinsic::amdgcn_set_inactive:
6562+
case Intrinsic::amdgcn_set_inactive_chain_arg:
6563+
case Intrinsic::amdgcn_mov_dpp8:
6564+
Results.push_back(lowerLaneOp(*this, N, DAG));
6565+
return;
65556566
}
65566567
break;
65576568
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,25 @@ define amdgpu_ps void @dpp8_double(double %in, ptr addrspace(1) %out) {
186186
ret void
187187
}
188188

189+
; GFX10PLUS-LABEL: {{^}}dpp8_i8:
190+
; GFX10PLUS: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
191+
; GFX10PLUS: global_store_{{byte|b8}} v[1:2], v0, off
192+
define amdgpu_ps void @dpp8_i8(i8 %in, ptr addrspace(1) %out) {
193+
%tmp0 = call i8 @llvm.amdgcn.mov.dpp8.i8(i8 %in, i32 1)
194+
store i8 %tmp0, ptr addrspace(1) %out
195+
ret void
196+
}
197+
198+
; GFX10PLUS-LABEL: {{^}}dpp8_i1:
199+
; GFX10PLUS: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
200+
; GFX10PLUS: v_and_b32_e32 v0, 1, v0
201+
; GFX10PLUS: global_store_{{byte|b8}} v[1:2], v0, off
202+
define amdgpu_ps void @dpp8_i1(i1 %in, ptr addrspace(1) %out) {
203+
%tmp0 = call i1 @llvm.amdgcn.mov.dpp8.i1(i1 %in, i32 1)
204+
store i1 %tmp0, ptr addrspace(1) %out
205+
ret void
206+
}
207+
189208
declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #0
190209

191210
attributes #0 = { nounwind readnone convergent }

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8932,6 +8932,85 @@ define void @v_permlane16_v2f32(ptr addrspace(1) %out, <2 x float> %src0, i32 %s
89328932
ret void
89338933
}
89348934

8935+
define void @v_permlane16_i8(ptr addrspace(1) %out, i8 %src0, i32 %src1, i32 %src2) {
8936+
; GFX10-LABEL: v_permlane16_i8:
8937+
; GFX10: ; %bb.0:
8938+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8939+
; GFX10-NEXT: v_readfirstlane_b32 s4, v3
8940+
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
8941+
; GFX10-NEXT: v_permlane16_b32 v2, v2, s4, s5
8942+
; GFX10-NEXT: global_store_byte v[0:1], v2, off
8943+
; GFX10-NEXT: s_setpc_b64 s[30:31]
8944+
;
8945+
; GFX11-LABEL: v_permlane16_i8:
8946+
; GFX11: ; %bb.0:
8947+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8948+
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
8949+
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
8950+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8951+
; GFX11-NEXT: v_permlane16_b32 v2, v2, s0, s1
8952+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
8953+
; GFX11-NEXT: s_setpc_b64 s[30:31]
8954+
;
8955+
; GFX12-LABEL: v_permlane16_i8:
8956+
; GFX12: ; %bb.0:
8957+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
8958+
; GFX12-NEXT: s_wait_expcnt 0x0
8959+
; GFX12-NEXT: s_wait_samplecnt 0x0
8960+
; GFX12-NEXT: s_wait_bvhcnt 0x0
8961+
; GFX12-NEXT: s_wait_kmcnt 0x0
8962+
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
8963+
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
8964+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
8965+
; GFX12-NEXT: v_permlane16_b32 v2, v2, s0, s1
8966+
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
8967+
; GFX12-NEXT: s_setpc_b64 s[30:31]
8968+
%v = call i8 @llvm.amdgcn.permlane16.i8(i8 %src0, i8 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
8969+
store i8 %v, ptr addrspace(1) %out
8970+
ret void
8971+
}
8972+
8973+
define void @v_permlane16_i1(ptr addrspace(1) %out, i1 %src0, i32 %src1, i32 %src2) {
8974+
; GFX10-LABEL: v_permlane16_i1:
8975+
; GFX10: ; %bb.0:
8976+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8977+
; GFX10-NEXT: v_readfirstlane_b32 s4, v3
8978+
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
8979+
; GFX10-NEXT: v_permlane16_b32 v2, v2, s4, s5
8980+
; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
8981+
; GFX10-NEXT: global_store_byte v[0:1], v2, off
8982+
; GFX10-NEXT: s_setpc_b64 s[30:31]
8983+
;
8984+
; GFX11-LABEL: v_permlane16_i1:
8985+
; GFX11: ; %bb.0:
8986+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8987+
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
8988+
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
8989+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8990+
; GFX11-NEXT: v_permlane16_b32 v2, v2, s0, s1
8991+
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
8992+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
8993+
; GFX11-NEXT: s_setpc_b64 s[30:31]
8994+
;
8995+
; GFX12-LABEL: v_permlane16_i1:
8996+
; GFX12: ; %bb.0:
8997+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
8998+
; GFX12-NEXT: s_wait_expcnt 0x0
8999+
; GFX12-NEXT: s_wait_samplecnt 0x0
9000+
; GFX12-NEXT: s_wait_bvhcnt 0x0
9001+
; GFX12-NEXT: s_wait_kmcnt 0x0
9002+
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
9003+
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
9004+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9005+
; GFX12-NEXT: v_permlane16_b32 v2, v2, s0, s1
9006+
; GFX12-NEXT: v_and_b32_e32 v2, 1, v2
9007+
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
9008+
; GFX12-NEXT: s_setpc_b64 s[30:31]
9009+
%v = call i1 @llvm.amdgcn.permlane16.i1(i1 %src0, i1 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
9010+
store i1 %v, ptr addrspace(1) %out
9011+
ret void
9012+
}
9013+
89359014
define void @v_permlanex16_v2f32(ptr addrspace(1) %out, <2 x float> %src0, i32 %src1, i32 %src2) {
89369015
; GFX10-SDAG-LABEL: v_permlanex16_v2f32:
89379016
; GFX10-SDAG: ; %bb.0:
@@ -9430,3 +9509,82 @@ define void @v_permlanex16_v8i16(ptr addrspace(1) %out, <8 x i16> %src0, i32 %sr
94309509
store <8 x i16> %v, ptr addrspace(1) %out
94319510
ret void
94329511
}
9512+
9513+
define void @v_permlanex16_i8(ptr addrspace(1) %out, i8 %src0, i32 %src1, i32 %src2) {
9514+
; GFX10-LABEL: v_permlanex16_i8:
9515+
; GFX10: ; %bb.0:
9516+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9517+
; GFX10-NEXT: v_readfirstlane_b32 s4, v3
9518+
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
9519+
; GFX10-NEXT: v_permlanex16_b32 v2, v2, s4, s5
9520+
; GFX10-NEXT: global_store_byte v[0:1], v2, off
9521+
; GFX10-NEXT: s_setpc_b64 s[30:31]
9522+
;
9523+
; GFX11-LABEL: v_permlanex16_i8:
9524+
; GFX11: ; %bb.0:
9525+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9526+
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
9527+
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
9528+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
9529+
; GFX11-NEXT: v_permlanex16_b32 v2, v2, s0, s1
9530+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
9531+
; GFX11-NEXT: s_setpc_b64 s[30:31]
9532+
;
9533+
; GFX12-LABEL: v_permlanex16_i8:
9534+
; GFX12: ; %bb.0:
9535+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
9536+
; GFX12-NEXT: s_wait_expcnt 0x0
9537+
; GFX12-NEXT: s_wait_samplecnt 0x0
9538+
; GFX12-NEXT: s_wait_bvhcnt 0x0
9539+
; GFX12-NEXT: s_wait_kmcnt 0x0
9540+
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
9541+
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
9542+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
9543+
; GFX12-NEXT: v_permlanex16_b32 v2, v2, s0, s1
9544+
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
9545+
; GFX12-NEXT: s_setpc_b64 s[30:31]
9546+
%v = call i8 @llvm.amdgcn.permlanex16.i8(i8 %src0, i8 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
9547+
store i8 %v, ptr addrspace(1) %out
9548+
ret void
9549+
}
9550+
9551+
define void @v_permlanex16_i1(ptr addrspace(1) %out, i1 %src0, i32 %src1, i32 %src2) {
9552+
; GFX10-LABEL: v_permlanex16_i1:
9553+
; GFX10: ; %bb.0:
9554+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9555+
; GFX10-NEXT: v_readfirstlane_b32 s4, v3
9556+
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
9557+
; GFX10-NEXT: v_permlanex16_b32 v2, v2, s4, s5
9558+
; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
9559+
; GFX10-NEXT: global_store_byte v[0:1], v2, off
9560+
; GFX10-NEXT: s_setpc_b64 s[30:31]
9561+
;
9562+
; GFX11-LABEL: v_permlanex16_i1:
9563+
; GFX11: ; %bb.0:
9564+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9565+
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
9566+
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
9567+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9568+
; GFX11-NEXT: v_permlanex16_b32 v2, v2, s0, s1
9569+
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
9570+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
9571+
; GFX11-NEXT: s_setpc_b64 s[30:31]
9572+
;
9573+
; GFX12-LABEL: v_permlanex16_i1:
9574+
; GFX12: ; %bb.0:
9575+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
9576+
; GFX12-NEXT: s_wait_expcnt 0x0
9577+
; GFX12-NEXT: s_wait_samplecnt 0x0
9578+
; GFX12-NEXT: s_wait_bvhcnt 0x0
9579+
; GFX12-NEXT: s_wait_kmcnt 0x0
9580+
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
9581+
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
9582+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9583+
; GFX12-NEXT: v_permlanex16_b32 v2, v2, s0, s1
9584+
; GFX12-NEXT: v_and_b32_e32 v2, 1, v2
9585+
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
9586+
; GFX12-NEXT: s_setpc_b64 s[30:31]
9587+
%v = call i1 @llvm.amdgcn.permlanex16.i1(i1 %src0, i1 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
9588+
store i1 %v, ptr addrspace(1) %out
9589+
ret void
9590+
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,33 @@ define amdgpu_kernel void @test_v(ptr addrspace(1) %out, i32 %src0) #1 {
5252
store i32 %v, ptr addrspace(1) %out
5353
ret void
5454
}
55+
56+
define void @test_i8(ptr addrspace(1) %out, i8 %src0) #1 {
57+
; GFX11-LABEL: test_i8:
58+
; GFX11: ; %bb.0:
59+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60+
; GFX11-NEXT: v_permlane64_b32 v2, v2
61+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
62+
; GFX11-NEXT: s_setpc_b64 s[30:31]
63+
%v = call i8 @llvm.amdgcn.permlane64.i8(i8 %src0)
64+
store i8 %v, ptr addrspace(1) %out
65+
ret void
66+
}
67+
68+
define void @test_i1(ptr addrspace(1) %out, i1 %src0) #1 {
69+
; GFX11-LABEL: test_i1:
70+
; GFX11: ; %bb.0:
71+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+
; GFX11-NEXT: v_permlane64_b32 v2, v2
73+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
74+
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
75+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
76+
; GFX11-NEXT: s_setpc_b64 s[30:31]
77+
%v = call i1 @llvm.amdgcn.permlane64.i1(i1 %src0)
78+
store i1 %v, ptr addrspace(1) %out
79+
ret void
80+
}
81+
5582
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
5683
; GFX11-GISEL: {{.*}}
5784
; GFX11-SDAG: {{.*}}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,3 +1729,50 @@ define void @test_readfirstlane_v32f16(ptr addrspace(1) %out, <32 x half> %src)
17291729
ret void
17301730
}
17311731

1732+
define void @dpp8_i8(i8 %in, ptr addrspace(1) %out) {
1733+
; CHECK-SDAG-LABEL: dpp8_i8:
1734+
; CHECK-SDAG: ; %bb.0:
1735+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736+
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v0
1737+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s4
1738+
; CHECK-SDAG-NEXT: flat_store_byte v[1:2], v0
1739+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
1740+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
1741+
;
1742+
; CHECK-GISEL-LABEL: dpp8_i8:
1743+
; CHECK-GISEL: ; %bb.0:
1744+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745+
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v0
1746+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s4
1747+
; CHECK-GISEL-NEXT: flat_store_byte v[1:2], v0
1748+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
1749+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
1750+
%tmp0 = call i8 @llvm.amdgcn.readfirstlane.i8(i8 %in)
1751+
store i8 %tmp0, ptr addrspace(1) %out
1752+
ret void
1753+
}
1754+
1755+
define void @dpp8_i1(i1 %in, ptr addrspace(1) %out) {
1756+
; CHECK-SDAG-LABEL: dpp8_i1:
1757+
; CHECK-SDAG: ; %bb.0:
1758+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759+
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v0
1760+
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
1761+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s4
1762+
; CHECK-SDAG-NEXT: flat_store_byte v[1:2], v0
1763+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
1764+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
1765+
;
1766+
; CHECK-GISEL-LABEL: dpp8_i1:
1767+
; CHECK-GISEL: ; %bb.0:
1768+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1769+
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v0
1770+
; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
1771+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s4
1772+
; CHECK-GISEL-NEXT: flat_store_byte v[1:2], v0
1773+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
1774+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
1775+
%tmp0 = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %in)
1776+
store i1 %tmp0, ptr addrspace(1) %out
1777+
ret void
1778+
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,54 @@ define void @test_readlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src, i32 %src
894894
ret void
895895
}
896896

897+
define void @dpp8_i8(i8 %in, ptr addrspace(1) %out) {
898+
; CHECK-SDAG-LABEL: dpp8_i8:
899+
; CHECK-SDAG: ; %bb.0:
900+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901+
; CHECK-SDAG-NEXT: v_readlane_b32 s4, v0, 1
902+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s4
903+
; CHECK-SDAG-NEXT: flat_store_byte v[1:2], v0
904+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
905+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
906+
;
907+
; CHECK-GISEL-LABEL: dpp8_i8:
908+
; CHECK-GISEL: ; %bb.0:
909+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
910+
; CHECK-GISEL-NEXT: v_readlane_b32 s4, v0, 1
911+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s4
912+
; CHECK-GISEL-NEXT: flat_store_byte v[1:2], v0
913+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
914+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
915+
%tmp0 = call i8 @llvm.amdgcn.readlane.i8(i8 %in, i32 1)
916+
store i8 %tmp0, ptr addrspace(1) %out
917+
ret void
918+
}
919+
920+
define void @dpp8_i1(i1 %in, ptr addrspace(1) %out) {
921+
; CHECK-SDAG-LABEL: dpp8_i1:
922+
; CHECK-SDAG: ; %bb.0:
923+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
924+
; CHECK-SDAG-NEXT: v_readlane_b32 s4, v0, 1
925+
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
926+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s4
927+
; CHECK-SDAG-NEXT: flat_store_byte v[1:2], v0
928+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
929+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
930+
;
931+
; CHECK-GISEL-LABEL: dpp8_i1:
932+
; CHECK-GISEL: ; %bb.0:
933+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
934+
; CHECK-GISEL-NEXT: v_readlane_b32 s4, v0, 1
935+
; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
936+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s4
937+
; CHECK-GISEL-NEXT: flat_store_byte v[1:2], v0
938+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
939+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
940+
%tmp0 = call i1 @llvm.amdgcn.readlane.i1(i1 %in, i32 1)
941+
store i1 %tmp0, ptr addrspace(1) %out
942+
ret void
943+
}
944+
897945
declare i32 @llvm.amdgcn.workitem.id.x() #2
898946

899947
attributes #0 = { nounwind readnone convergent }

0 commit comments

Comments
 (0)