11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
22; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s
33
4+ @lds = external local_unnamed_addr addrspace (3 ) global [4 x float ], align 4
5+
46; expect readfirstlane to pick the 32bit register
5- define amdgpu_gs i32 @vgpr16_copyto_sgpr (ptr addrspace ( 3 ) %a , i32 %b , ptr addrspace ( 1 ) %out ) {
7+ define amdgpu_gs i32 @vgpr16_copyto_sgpr () {
68; CHECK-LABEL: vgpr16_copyto_sgpr:
79; CHECK: ; %bb.0: ; %entry
10+ ; CHECK-NEXT: v_mov_b32_e32 v0, lds@abs32@lo
811; CHECK-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
912; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1013; CHECK-NEXT: v_cvt_f16_f32_e32 v0.l, v0
@@ -23,15 +26,15 @@ define amdgpu_gs i32 @vgpr16_copyto_sgpr(ptr addrspace(3) %a, i32 %b, ptr addrsp
2326; CHECK-NEXT: s_branch .LBB0_3
2427; CHECK-NEXT: .LBB0_3:
2528entry:
26- %1 = load <4 x float >, ptr addrspace (3 ) poison , align 4
27- %2 = extractelement <4 x float > %1 , i32 0
28- %3 = fptrunc float %2 to half
29- %4 = bitcast half %3 to i16
30- %5 = zext i16 %4 to i32
31- %6 = add i32 %5 , 1
32- %7 = mul i32 %6 , 5
33- %8 = icmp eq i32 %7 , 7
34- br i1 %8 , label %a1 , label %a2
29+ %ptr = load <4 x float >, ptr addrspace (3 ) @lds , align 4
30+ %f = extractelement <4 x float > %ptr , i32 0
31+ %half = fptrunc float %f to half
32+ %i16 = bitcast half %half to i16
33+ %i32 = zext i16 %i16 to i32
34+ %add = add i32 %i32 , 1
35+ %mul = mul i32 %add , 5
36+ %icmp = icmp eq i32 %mul , 7
37+ br i1 %icmp , label %a1 , label %a2
3538
3639a1:
3740 ret i32 1
0 commit comments