@@ -122,7 +122,7 @@ main_body:
122122 ret half %addf_sum.2
123123}
124124
125- define void @image_sample_2d_multi_fptrunc_to_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v , ptr addrspace ( 7 ) %out ) {
125+ define amdgpu_ps half @image_sample_2d_multi_fptrunc_to_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
126126; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
127127; GFX7-NEXT: main_body:
128128; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
@@ -134,8 +134,7 @@ define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32
134134; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
135135; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
136136; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
137- ; GFX7-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
138- ; GFX7-NEXT: ret void
137+ ; GFX7-NEXT: ret half [[RES]]
139138;
140139; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
141140; GFX81PLUS-NEXT: main_body:
@@ -148,8 +147,7 @@ define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32
148147; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
149148; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
150149; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
151- ; GFX81PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
152- ; GFX81PLUS-NEXT: ret void
150+ ; GFX81PLUS-NEXT: ret half [[RES]]
153151;
154152main_body:
155153 %sample = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32 (i32 15 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
@@ -161,8 +159,7 @@ main_body:
161159 %h2 = fptrunc float %e2 to half
162160 %mul = fmul half %h0 , %h1
163161 %res = fadd half %mul , %h2
164- store half %res , ptr addrspace (7 ) %out , align 2
165- ret void
162+ ret half %res
166163}
167164
168165define amdgpu_ps half @image_gather4_2d_v4f32 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %s , half %t ) {
@@ -444,4 +441,3 @@ declare <3 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v3f32.i32(i32, i32, i3
444441declare <4 x float > @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32 (i32 , i32 , i32 , i32 , <8 x i32 >, i32 , i32 ) #0
445442
446443attributes #0 = { nounwind readonly willreturn}
447-
0 commit comments