@@ -239,6 +239,140 @@ main_body:
239239 ret bfloat %res
240240}
241241
242+ define amdgpu_ps float @image_sample_2d_single_pkrtz_to_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
243+ ; GFX7-LABEL: @image_sample_2d_single_pkrtz_to_d16(
244+ ; GFX7-NEXT: main_body:
245+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
246+ ; GFX7-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float 0.000000e+00)
247+ ; GFX7-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
248+ ; GFX7-NEXT: [[MUL:%.*]] = fmul reassoc arcp contract afn half [[H0]], [[H0]]
249+ ; GFX7-NEXT: [[DIV:%.*]] = fdiv reassoc arcp contract afn half [[MUL]], [[H0]]
250+ ; GFX7-NEXT: [[ADD:%.*]] = fadd reassoc arcp contract afn half [[DIV]], [[H0]]
251+ ; GFX7-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
252+ ; GFX7-NEXT: ret float [[RES]]
253+ ;
254+ ; GFX81PLUS-LABEL: @image_sample_2d_single_pkrtz_to_d16(
255+ ; GFX81PLUS-NEXT: main_body:
256+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
257+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul reassoc arcp contract afn half [[SAMPLE]], [[SAMPLE]]
258+ ; GFX81PLUS-NEXT: [[DIV:%.*]] = fdiv reassoc arcp contract afn half [[MUL]], [[SAMPLE]]
259+ ; GFX81PLUS-NEXT: [[ADD:%.*]] = fadd reassoc arcp contract afn half [[DIV]], [[SAMPLE]]
260+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
261+ ; GFX81PLUS-NEXT: ret float [[RES]]
262+ ;
263+ main_body:
264+ %sample = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32 (i32 1 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
265+ %pack = call <2 x half > @llvm.amdgcn.cvt.pkrtz (float %sample , float 0 .000000e+00 )
266+ %h0 = extractelement <2 x half > %pack , i64 0
267+ %mul = fmul reassoc arcp contract afn half %h0 , %h0
268+ %div = fdiv reassoc arcp contract afn half %mul , %h0
269+ %add = fadd reassoc arcp contract afn half %div , %h0
270+ %res = fpext half %add to float
271+ ret float %res
272+ }
273+
274+ define amdgpu_ps float @image_sample_2d_pkrtz_variable_no_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
275+ ; GFX7-LABEL: @image_sample_2d_pkrtz_variable_no_d16(
276+ ; GFX7-NEXT: main_body:
277+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
278+ ; GFX7-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float [[V]])
279+ ; GFX7-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
280+ ; GFX7-NEXT: [[H1:%.*]] = extractelement <2 x half> [[PACK]], i64 1
281+ ; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
282+ ; GFX7-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[H0]]
283+ ; GFX7-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
284+ ; GFX7-NEXT: ret float [[RES]]
285+ ;
286+ ; GFX81PLUS-LABEL: @image_sample_2d_pkrtz_variable_no_d16(
287+ ; GFX81PLUS-NEXT: main_body:
288+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
289+ ; GFX81PLUS-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float [[V]])
290+ ; GFX81PLUS-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
291+ ; GFX81PLUS-NEXT: [[H1:%.*]] = extractelement <2 x half> [[PACK]], i64 1
292+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
293+ ; GFX81PLUS-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[H0]]
294+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
295+ ; GFX81PLUS-NEXT: ret float [[RES]]
296+ ;
297+ main_body:
298+ %sample = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32 (i32 1 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
299+ %pack = call <2 x half > @llvm.amdgcn.cvt.pkrtz (float %sample , float %v )
300+ %h0 = extractelement <2 x half > %pack , i64 0
301+ %h1 = extractelement <2 x half > %pack , i64 1
302+ %mul = fmul half %h0 , %h1
303+ %add = fadd half %mul , %h0
304+ %res = fpext half %add to float
305+ ret float %res
306+ }
307+
308+ define amdgpu_ps float @image_sample_2d_pkrtz_constant_no_fold (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
309+ ; GFX7-LABEL: @image_sample_2d_pkrtz_constant_no_fold(
310+ ; GFX7-NEXT: main_body:
311+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
312+ ; GFX7-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float 0.000000e+00)
313+ ; GFX7-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
314+ ; GFX7-NEXT: [[H1:%.*]] = extractelement <2 x half> [[PACK]], i64 1
315+ ; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
316+ ; GFX7-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[H0]]
317+ ; GFX7-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
318+ ; GFX7-NEXT: ret float [[RES]]
319+ ;
320+ ; GFX81PLUS-LABEL: @image_sample_2d_pkrtz_constant_no_fold(
321+ ; GFX81PLUS-NEXT: main_body:
322+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
323+ ; GFX81PLUS-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float 0.000000e+00)
324+ ; GFX81PLUS-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
325+ ; GFX81PLUS-NEXT: [[H1:%.*]] = extractelement <2 x half> [[PACK]], i64 1
326+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
327+ ; GFX81PLUS-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[H0]]
328+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
329+ ; GFX81PLUS-NEXT: ret float [[RES]]
330+ ;
331+ main_body:
332+ %sample = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32 (i32 1 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
333+ %pack = call <2 x half > @llvm.amdgcn.cvt.pkrtz (float %sample , float 0 .000000e+00 )
334+ %h0 = extractelement <2 x half > %pack , i64 0
335+ %h1 = extractelement <2 x half > %pack , i64 1
336+ %mul = fmul half %h0 , %h1
337+ %add = fadd half %mul , %h0
338+ %res = fpext half %add to float
339+ ret float %res
340+ }
341+
342+ define amdgpu_ps float @image_sample_2d_single_pkrtz_high_no_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
343+ ; GFX7-LABEL: @image_sample_2d_single_pkrtz_high_no_d16(
344+ ; GFX7-NEXT: main_body:
345+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
346+ ; GFX7-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[SAMPLE]])
347+ ; GFX7-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 1
348+ ; GFX7-NEXT: [[MUL:%.*]] = fmul reassoc arcp contract afn half [[H0]], [[H0]]
349+ ; GFX7-NEXT: [[DIV:%.*]] = fdiv reassoc arcp contract afn half [[MUL]], [[H0]]
350+ ; GFX7-NEXT: [[ADD:%.*]] = fadd reassoc arcp contract afn half [[DIV]], [[H0]]
351+ ; GFX7-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
352+ ; GFX7-NEXT: ret float [[RES]]
353+ ;
354+ ; GFX81PLUS-LABEL: @image_sample_2d_single_pkrtz_high_no_d16(
355+ ; GFX81PLUS-NEXT: main_body:
356+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
357+ ; GFX81PLUS-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[SAMPLE]])
358+ ; GFX81PLUS-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 1
359+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul reassoc arcp contract afn half [[H0]], [[H0]]
360+ ; GFX81PLUS-NEXT: [[DIV:%.*]] = fdiv reassoc arcp contract afn half [[MUL]], [[H0]]
361+ ; GFX81PLUS-NEXT: [[ADD:%.*]] = fadd reassoc arcp contract afn half [[DIV]], [[H0]]
362+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
363+ ; GFX81PLUS-NEXT: ret float [[RES]]
364+ ;
365+ main_body:
366+ %sample = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32 (i32 1 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
367+ %pack = call <2 x half > @llvm.amdgcn.cvt.pkrtz (float 0 .000000e+00 , float %sample )
368+ %h0 = extractelement <2 x half > %pack , i64 1
369+ %mul = fmul reassoc arcp contract afn half %h0 , %h0
370+ %div = fdiv reassoc arcp contract afn half %mul , %h0
371+ %add = fadd reassoc arcp contract afn half %div , %h0
372+ %res = fpext half %add to float
373+ ret float %res
374+ }
375+
242376define amdgpu_ps half @image_gather4_2d_v4f32 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %s , half %t ) {
243377; GFX7-LABEL: @image_gather4_2d_v4f32(
244378; GFX7-NEXT: main_body:
0 commit comments