Skip to content

Commit 331bf93

Browse files
committed
[AMDGPU] Allow lane-op lowering for illegal types
Currently overloaded lane-op intrinsics only work for legal types. It fails with 'Do not know how to promote this operator' with SDag on the i8 type notably. The patch fixes that.
1 parent 6d7e51d commit 331bf93

File tree

9 files changed

+558
-0
lines changed

9 files changed

+558
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6454,6 +6454,17 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
64546454
Results.push_back(LoadVal);
64556455
return;
64566456
}
6457+
case Intrinsic::amdgcn_readlane:
6458+
case Intrinsic::amdgcn_readfirstlane:
6459+
case Intrinsic::amdgcn_writelane:
6460+
case Intrinsic::amdgcn_permlane16:
6461+
case Intrinsic::amdgcn_permlanex16:
6462+
case Intrinsic::amdgcn_permlane64:
6463+
case Intrinsic::amdgcn_set_inactive:
6464+
case Intrinsic::amdgcn_set_inactive_chain_arg:
6465+
case Intrinsic::amdgcn_mov_dpp8:
6466+
Results.push_back(lowerLaneOp(*this, N, DAG));
6467+
return;
64576468
}
64586469
break;
64596470
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,25 @@ define amdgpu_ps void @dpp8_double(double %in, ptr addrspace(1) %out) {
184184
ret void
185185
}
186186

187+
; GFX10PLUS-LABEL: {{^}}dpp8_i8:
188+
; GFX10PLUS: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
189+
; GFX10PLUS: global_store_{{byte|b8}} v[1:2], v0, off
190+
define amdgpu_ps void @dpp8_i8(i8 %in, ptr addrspace(1) %out) {
191+
%tmp0 = call i8 @llvm.amdgcn.mov.dpp8.i8(i8 %in, i32 1)
192+
store i8 %tmp0, ptr addrspace(1) %out
193+
ret void
194+
}
195+
196+
; GFX10PLUS-LABEL: {{^}}dpp8_i1:
197+
; GFX10PLUS: v_mov_b32_dpp v0, v0 dpp8:[1,0,0,0,0,0,0,0]
198+
; GFX10PLUS: v_and_b32_e32 v0, 1, v0
199+
; GFX10PLUS: global_store_{{byte|b8}} v[1:2], v0, off
200+
define amdgpu_ps void @dpp8_i1(i1 %in, ptr addrspace(1) %out) {
201+
%tmp0 = call i1 @llvm.amdgcn.mov.dpp8.i1(i1 %in, i32 1)
202+
store i1 %tmp0, ptr addrspace(1) %out
203+
ret void
204+
}
205+
187206
declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #0
188207

189208
attributes #0 = { nounwind readnone convergent }

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8770,6 +8770,85 @@ define void @v_permlane16_v2f32(ptr addrspace(1) %out, <2 x float> %src0, i32 %s
87708770
ret void
87718771
}
87728772

8773+
define void @v_permlane16_i8(ptr addrspace(1) %out, i8 %src0, i32 %src1, i32 %src2) {
8774+
; GFX10-LABEL: v_permlane16_i8:
8775+
; GFX10: ; %bb.0:
8776+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8777+
; GFX10-NEXT: v_readfirstlane_b32 s4, v3
8778+
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
8779+
; GFX10-NEXT: v_permlane16_b32 v2, v2, s4, s5
8780+
; GFX10-NEXT: global_store_byte v[0:1], v2, off
8781+
; GFX10-NEXT: s_setpc_b64 s[30:31]
8782+
;
8783+
; GFX11-LABEL: v_permlane16_i8:
8784+
; GFX11: ; %bb.0:
8785+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8786+
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
8787+
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
8788+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8789+
; GFX11-NEXT: v_permlane16_b32 v2, v2, s0, s1
8790+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
8791+
; GFX11-NEXT: s_setpc_b64 s[30:31]
8792+
;
8793+
; GFX12-LABEL: v_permlane16_i8:
8794+
; GFX12: ; %bb.0:
8795+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
8796+
; GFX12-NEXT: s_wait_expcnt 0x0
8797+
; GFX12-NEXT: s_wait_samplecnt 0x0
8798+
; GFX12-NEXT: s_wait_bvhcnt 0x0
8799+
; GFX12-NEXT: s_wait_kmcnt 0x0
8800+
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
8801+
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
8802+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
8803+
; GFX12-NEXT: v_permlane16_b32 v2, v2, s0, s1
8804+
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
8805+
; GFX12-NEXT: s_setpc_b64 s[30:31]
8806+
%v = call i8 @llvm.amdgcn.permlane16.i8(i8 %src0, i8 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
8807+
store i8 %v, ptr addrspace(1) %out
8808+
ret void
8809+
}
8810+
8811+
define void @v_permlane16_i1(ptr addrspace(1) %out, i1 %src0, i32 %src1, i32 %src2) {
8812+
; GFX10-LABEL: v_permlane16_i1:
8813+
; GFX10: ; %bb.0:
8814+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8815+
; GFX10-NEXT: v_readfirstlane_b32 s4, v3
8816+
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
8817+
; GFX10-NEXT: v_permlane16_b32 v2, v2, s4, s5
8818+
; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
8819+
; GFX10-NEXT: global_store_byte v[0:1], v2, off
8820+
; GFX10-NEXT: s_setpc_b64 s[30:31]
8821+
;
8822+
; GFX11-LABEL: v_permlane16_i1:
8823+
; GFX11: ; %bb.0:
8824+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8825+
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
8826+
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
8827+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8828+
; GFX11-NEXT: v_permlane16_b32 v2, v2, s0, s1
8829+
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
8830+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
8831+
; GFX11-NEXT: s_setpc_b64 s[30:31]
8832+
;
8833+
; GFX12-LABEL: v_permlane16_i1:
8834+
; GFX12: ; %bb.0:
8835+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
8836+
; GFX12-NEXT: s_wait_expcnt 0x0
8837+
; GFX12-NEXT: s_wait_samplecnt 0x0
8838+
; GFX12-NEXT: s_wait_bvhcnt 0x0
8839+
; GFX12-NEXT: s_wait_kmcnt 0x0
8840+
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
8841+
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
8842+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
8843+
; GFX12-NEXT: v_permlane16_b32 v2, v2, s0, s1
8844+
; GFX12-NEXT: v_and_b32_e32 v2, 1, v2
8845+
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
8846+
; GFX12-NEXT: s_setpc_b64 s[30:31]
8847+
%v = call i1 @llvm.amdgcn.permlane16.i1(i1 %src0, i1 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
8848+
store i1 %v, ptr addrspace(1) %out
8849+
ret void
8850+
}
8851+
87738852
define void @v_permlanex16_v2f32(ptr addrspace(1) %out, <2 x float> %src0, i32 %src1, i32 %src2) {
87748853
; GFX10-SDAG-LABEL: v_permlanex16_v2f32:
87758854
; GFX10-SDAG: ; %bb.0:
@@ -9258,3 +9337,82 @@ define void @v_permlanex16_v8i16(ptr addrspace(1) %out, <8 x i16> %src0, i32 %sr
92589337
store <8 x i16> %v, ptr addrspace(1) %out
92599338
ret void
92609339
}
9340+
9341+
define void @v_permlanex16_i8(ptr addrspace(1) %out, i8 %src0, i32 %src1, i32 %src2) {
9342+
; GFX10-LABEL: v_permlanex16_i8:
9343+
; GFX10: ; %bb.0:
9344+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9345+
; GFX10-NEXT: v_readfirstlane_b32 s4, v3
9346+
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
9347+
; GFX10-NEXT: v_permlanex16_b32 v2, v2, s4, s5
9348+
; GFX10-NEXT: global_store_byte v[0:1], v2, off
9349+
; GFX10-NEXT: s_setpc_b64 s[30:31]
9350+
;
9351+
; GFX11-LABEL: v_permlanex16_i8:
9352+
; GFX11: ; %bb.0:
9353+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9354+
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
9355+
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
9356+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
9357+
; GFX11-NEXT: v_permlanex16_b32 v2, v2, s0, s1
9358+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
9359+
; GFX11-NEXT: s_setpc_b64 s[30:31]
9360+
;
9361+
; GFX12-LABEL: v_permlanex16_i8:
9362+
; GFX12: ; %bb.0:
9363+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
9364+
; GFX12-NEXT: s_wait_expcnt 0x0
9365+
; GFX12-NEXT: s_wait_samplecnt 0x0
9366+
; GFX12-NEXT: s_wait_bvhcnt 0x0
9367+
; GFX12-NEXT: s_wait_kmcnt 0x0
9368+
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
9369+
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
9370+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
9371+
; GFX12-NEXT: v_permlanex16_b32 v2, v2, s0, s1
9372+
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
9373+
; GFX12-NEXT: s_setpc_b64 s[30:31]
9374+
%v = call i8 @llvm.amdgcn.permlanex16.i8(i8 %src0, i8 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
9375+
store i8 %v, ptr addrspace(1) %out
9376+
ret void
9377+
}
9378+
9379+
define void @v_permlanex16_i1(ptr addrspace(1) %out, i1 %src0, i32 %src1, i32 %src2) {
9380+
; GFX10-LABEL: v_permlanex16_i1:
9381+
; GFX10: ; %bb.0:
9382+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9383+
; GFX10-NEXT: v_readfirstlane_b32 s4, v3
9384+
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
9385+
; GFX10-NEXT: v_permlanex16_b32 v2, v2, s4, s5
9386+
; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
9387+
; GFX10-NEXT: global_store_byte v[0:1], v2, off
9388+
; GFX10-NEXT: s_setpc_b64 s[30:31]
9389+
;
9390+
; GFX11-LABEL: v_permlanex16_i1:
9391+
; GFX11: ; %bb.0:
9392+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9393+
; GFX11-NEXT: v_readfirstlane_b32 s0, v3
9394+
; GFX11-NEXT: v_readfirstlane_b32 s1, v4
9395+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9396+
; GFX11-NEXT: v_permlanex16_b32 v2, v2, s0, s1
9397+
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
9398+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
9399+
; GFX11-NEXT: s_setpc_b64 s[30:31]
9400+
;
9401+
; GFX12-LABEL: v_permlanex16_i1:
9402+
; GFX12: ; %bb.0:
9403+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
9404+
; GFX12-NEXT: s_wait_expcnt 0x0
9405+
; GFX12-NEXT: s_wait_samplecnt 0x0
9406+
; GFX12-NEXT: s_wait_bvhcnt 0x0
9407+
; GFX12-NEXT: s_wait_kmcnt 0x0
9408+
; GFX12-NEXT: v_readfirstlane_b32 s0, v3
9409+
; GFX12-NEXT: v_readfirstlane_b32 s1, v4
9410+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
9411+
; GFX12-NEXT: v_permlanex16_b32 v2, v2, s0, s1
9412+
; GFX12-NEXT: v_and_b32_e32 v2, 1, v2
9413+
; GFX12-NEXT: global_store_b8 v[0:1], v2, off
9414+
; GFX12-NEXT: s_setpc_b64 s[30:31]
9415+
%v = call i1 @llvm.amdgcn.permlanex16.i1(i1 %src0, i1 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
9416+
store i1 %v, ptr addrspace(1) %out
9417+
ret void
9418+
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,33 @@ define amdgpu_kernel void @test_v(ptr addrspace(1) %out, i32 %src0) #1 {
5252
store i32 %v, ptr addrspace(1) %out
5353
ret void
5454
}
55+
56+
define void @test_i8(ptr addrspace(1) %out, i8 %src0) #1 {
57+
; GFX11-LABEL: test_i8:
58+
; GFX11: ; %bb.0:
59+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60+
; GFX11-NEXT: v_permlane64_b32 v2, v2
61+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
62+
; GFX11-NEXT: s_setpc_b64 s[30:31]
63+
%v = call i8 @llvm.amdgcn.permlane64.i8(i8 %src0)
64+
store i8 %v, ptr addrspace(1) %out
65+
ret void
66+
}
67+
68+
define void @test_i1(ptr addrspace(1) %out, i1 %src0) #1 {
69+
; GFX11-LABEL: test_i1:
70+
; GFX11: ; %bb.0:
71+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+
; GFX11-NEXT: v_permlane64_b32 v2, v2
73+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
74+
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
75+
; GFX11-NEXT: global_store_b8 v[0:1], v2, off
76+
; GFX11-NEXT: s_setpc_b64 s[30:31]
77+
%v = call i1 @llvm.amdgcn.permlane64.i1(i1 %src0)
78+
store i1 %v, ptr addrspace(1) %out
79+
ret void
80+
}
81+
5582
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
5683
; GFX11-GISEL: {{.*}}
5784
; GFX11-SDAG: {{.*}}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,3 +700,51 @@ define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
700700
call void asm sideeffect "; use $0", "s"(<8 x i16> %x)
701701
ret void
702702
}
703+
704+
define void @dpp8_i8(i8 %in, ptr addrspace(1) %out) {
705+
; CHECK-SDAG-LABEL: dpp8_i8:
706+
; CHECK-SDAG: ; %bb.0:
707+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708+
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v0
709+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s4
710+
; CHECK-SDAG-NEXT: flat_store_byte v[1:2], v0
711+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
712+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
713+
;
714+
; CHECK-GISEL-LABEL: dpp8_i8:
715+
; CHECK-GISEL: ; %bb.0:
716+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717+
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v0
718+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s4
719+
; CHECK-GISEL-NEXT: flat_store_byte v[1:2], v0
720+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
721+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
722+
%tmp0 = call i8 @llvm.amdgcn.readfirstlane.i8(i8 %in)
723+
store i8 %tmp0, ptr addrspace(1) %out
724+
ret void
725+
}
726+
727+
define void @dpp8_i1(i1 %in, ptr addrspace(1) %out) {
728+
; CHECK-SDAG-LABEL: dpp8_i1:
729+
; CHECK-SDAG: ; %bb.0:
730+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731+
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v0
732+
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
733+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s4
734+
; CHECK-SDAG-NEXT: flat_store_byte v[1:2], v0
735+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
736+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
737+
;
738+
; CHECK-GISEL-LABEL: dpp8_i1:
739+
; CHECK-GISEL: ; %bb.0:
740+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
741+
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v0
742+
; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
743+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s4
744+
; CHECK-GISEL-NEXT: flat_store_byte v[1:2], v0
745+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
746+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
747+
%tmp0 = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %in)
748+
store i1 %tmp0, ptr addrspace(1) %out
749+
ret void
750+
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,54 @@ define void @test_readlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src, i32 %src
894894
ret void
895895
}
896896

897+
define void @dpp8_i8(i8 %in, ptr addrspace(1) %out) {
898+
; CHECK-SDAG-LABEL: dpp8_i8:
899+
; CHECK-SDAG: ; %bb.0:
900+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901+
; CHECK-SDAG-NEXT: v_readlane_b32 s4, v0, 1
902+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s4
903+
; CHECK-SDAG-NEXT: flat_store_byte v[1:2], v0
904+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
905+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
906+
;
907+
; CHECK-GISEL-LABEL: dpp8_i8:
908+
; CHECK-GISEL: ; %bb.0:
909+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
910+
; CHECK-GISEL-NEXT: v_readlane_b32 s4, v0, 1
911+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s4
912+
; CHECK-GISEL-NEXT: flat_store_byte v[1:2], v0
913+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
914+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
915+
%tmp0 = call i8 @llvm.amdgcn.readlane.i8(i8 %in, i32 1)
916+
store i8 %tmp0, ptr addrspace(1) %out
917+
ret void
918+
}
919+
920+
define void @dpp8_i1(i1 %in, ptr addrspace(1) %out) {
921+
; CHECK-SDAG-LABEL: dpp8_i1:
922+
; CHECK-SDAG: ; %bb.0:
923+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
924+
; CHECK-SDAG-NEXT: v_readlane_b32 s4, v0, 1
925+
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
926+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s4
927+
; CHECK-SDAG-NEXT: flat_store_byte v[1:2], v0
928+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
929+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
930+
;
931+
; CHECK-GISEL-LABEL: dpp8_i1:
932+
; CHECK-GISEL: ; %bb.0:
933+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
934+
; CHECK-GISEL-NEXT: v_readlane_b32 s4, v0, 1
935+
; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
936+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s4
937+
; CHECK-GISEL-NEXT: flat_store_byte v[1:2], v0
938+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
939+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
940+
%tmp0 = call i1 @llvm.amdgcn.readlane.i1(i1 %in, i32 1)
941+
store i1 %tmp0, ptr addrspace(1) %out
942+
ret void
943+
}
944+
897945
declare i32 @llvm.amdgcn.workitem.id.x() #2
898946

899947
attributes #0 = { nounwind readnone convergent }

0 commit comments

Comments
 (0)