@@ -239,6 +239,140 @@ main_body:
239
239
ret bfloat %res
240
240
}
241
241
242
+ define amdgpu_ps float @image_sample_2d_single_pkrtz_to_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
243
+ ; GFX7-LABEL: @image_sample_2d_single_pkrtz_to_d16(
244
+ ; GFX7-NEXT: main_body:
245
+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
246
+ ; GFX7-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float 0.000000e+00)
247
+ ; GFX7-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
248
+ ; GFX7-NEXT: [[MUL:%.*]] = fmul reassoc arcp contract afn half [[H0]], [[H0]]
249
+ ; GFX7-NEXT: [[DIV:%.*]] = fdiv reassoc arcp contract afn half [[MUL]], [[H0]]
250
+ ; GFX7-NEXT: [[ADD:%.*]] = fadd reassoc arcp contract afn half [[DIV]], [[H0]]
251
+ ; GFX7-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
252
+ ; GFX7-NEXT: ret float [[RES]]
253
+ ;
254
+ ; GFX81PLUS-LABEL: @image_sample_2d_single_pkrtz_to_d16(
255
+ ; GFX81PLUS-NEXT: main_body:
256
+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
257
+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul reassoc arcp contract afn half [[SAMPLE]], [[SAMPLE]]
258
+ ; GFX81PLUS-NEXT: [[DIV:%.*]] = fdiv reassoc arcp contract afn half [[MUL]], [[SAMPLE]]
259
+ ; GFX81PLUS-NEXT: [[ADD:%.*]] = fadd reassoc arcp contract afn half [[DIV]], [[SAMPLE]]
260
+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
261
+ ; GFX81PLUS-NEXT: ret float [[RES]]
262
+ ;
263
+ main_body:
264
+ %sample = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32 (i32 1 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
265
+ %pack = call <2 x half > @llvm.amdgcn.cvt.pkrtz (float %sample , float 0 .000000e+00 )
266
+ %h0 = extractelement <2 x half > %pack , i64 0
267
+ %mul = fmul reassoc arcp contract afn half %h0 , %h0
268
+ %div = fdiv reassoc arcp contract afn half %mul , %h0
269
+ %add = fadd reassoc arcp contract afn half %div , %h0
270
+ %res = fpext half %add to float
271
+ ret float %res
272
+ }
273
+
274
+ define amdgpu_ps float @image_sample_2d_pkrtz_variable_no_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
275
+ ; GFX7-LABEL: @image_sample_2d_pkrtz_variable_no_d16(
276
+ ; GFX7-NEXT: main_body:
277
+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
278
+ ; GFX7-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float [[V]])
279
+ ; GFX7-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
280
+ ; GFX7-NEXT: [[H1:%.*]] = extractelement <2 x half> [[PACK]], i64 1
281
+ ; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
282
+ ; GFX7-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[H0]]
283
+ ; GFX7-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
284
+ ; GFX7-NEXT: ret float [[RES]]
285
+ ;
286
+ ; GFX81PLUS-LABEL: @image_sample_2d_pkrtz_variable_no_d16(
287
+ ; GFX81PLUS-NEXT: main_body:
288
+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
289
+ ; GFX81PLUS-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float [[V]])
290
+ ; GFX81PLUS-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
291
+ ; GFX81PLUS-NEXT: [[H1:%.*]] = extractelement <2 x half> [[PACK]], i64 1
292
+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
293
+ ; GFX81PLUS-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[H0]]
294
+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
295
+ ; GFX81PLUS-NEXT: ret float [[RES]]
296
+ ;
297
+ main_body:
298
+ %sample = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32 (i32 1 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
299
+ %pack = call <2 x half > @llvm.amdgcn.cvt.pkrtz (float %sample , float %v )
300
+ %h0 = extractelement <2 x half > %pack , i64 0
301
+ %h1 = extractelement <2 x half > %pack , i64 1
302
+ %mul = fmul half %h0 , %h1
303
+ %add = fadd half %mul , %h0
304
+ %res = fpext half %add to float
305
+ ret float %res
306
+ }
307
+
308
+ define amdgpu_ps float @image_sample_2d_pkrtz_constant_no_fold (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
309
+ ; GFX7-LABEL: @image_sample_2d_pkrtz_constant_no_fold(
310
+ ; GFX7-NEXT: main_body:
311
+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
312
+ ; GFX7-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float 0.000000e+00)
313
+ ; GFX7-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
314
+ ; GFX7-NEXT: [[H1:%.*]] = extractelement <2 x half> [[PACK]], i64 1
315
+ ; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
316
+ ; GFX7-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[H0]]
317
+ ; GFX7-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
318
+ ; GFX7-NEXT: ret float [[RES]]
319
+ ;
320
+ ; GFX81PLUS-LABEL: @image_sample_2d_pkrtz_constant_no_fold(
321
+ ; GFX81PLUS-NEXT: main_body:
322
+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
323
+ ; GFX81PLUS-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[SAMPLE]], float 0.000000e+00)
324
+ ; GFX81PLUS-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 0
325
+ ; GFX81PLUS-NEXT: [[H1:%.*]] = extractelement <2 x half> [[PACK]], i64 1
326
+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
327
+ ; GFX81PLUS-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[H0]]
328
+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
329
+ ; GFX81PLUS-NEXT: ret float [[RES]]
330
+ ;
331
+ main_body:
332
+ %sample = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32 (i32 1 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
333
+ %pack = call <2 x half > @llvm.amdgcn.cvt.pkrtz (float %sample , float 0 .000000e+00 )
334
+ %h0 = extractelement <2 x half > %pack , i64 0
335
+ %h1 = extractelement <2 x half > %pack , i64 1
336
+ %mul = fmul half %h0 , %h1
337
+ %add = fadd half %mul , %h0
338
+ %res = fpext half %add to float
339
+ ret float %res
340
+ }
341
+
342
+ define amdgpu_ps float @image_sample_2d_single_pkrtz_high_no_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
343
+ ; GFX7-LABEL: @image_sample_2d_single_pkrtz_high_no_d16(
344
+ ; GFX7-NEXT: main_body:
345
+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
346
+ ; GFX7-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[SAMPLE]])
347
+ ; GFX7-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 1
348
+ ; GFX7-NEXT: [[MUL:%.*]] = fmul reassoc arcp contract afn half [[H0]], [[H0]]
349
+ ; GFX7-NEXT: [[DIV:%.*]] = fdiv reassoc arcp contract afn half [[MUL]], [[H0]]
350
+ ; GFX7-NEXT: [[ADD:%.*]] = fadd reassoc arcp contract afn half [[DIV]], [[H0]]
351
+ ; GFX7-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
352
+ ; GFX7-NEXT: ret float [[RES]]
353
+ ;
354
+ ; GFX81PLUS-LABEL: @image_sample_2d_single_pkrtz_high_no_d16(
355
+ ; GFX81PLUS-NEXT: main_body:
356
+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
357
+ ; GFX81PLUS-NEXT: [[PACK:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[SAMPLE]])
358
+ ; GFX81PLUS-NEXT: [[H0:%.*]] = extractelement <2 x half> [[PACK]], i64 1
359
+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul reassoc arcp contract afn half [[H0]], [[H0]]
360
+ ; GFX81PLUS-NEXT: [[DIV:%.*]] = fdiv reassoc arcp contract afn half [[MUL]], [[H0]]
361
+ ; GFX81PLUS-NEXT: [[ADD:%.*]] = fadd reassoc arcp contract afn half [[DIV]], [[H0]]
362
+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fpext half [[ADD]] to float
363
+ ; GFX81PLUS-NEXT: ret float [[RES]]
364
+ ;
365
+ main_body:
366
+ %sample = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32 (i32 1 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
367
+ %pack = call <2 x half > @llvm.amdgcn.cvt.pkrtz (float 0 .000000e+00 , float %sample )
368
+ %h0 = extractelement <2 x half > %pack , i64 1
369
+ %mul = fmul reassoc arcp contract afn half %h0 , %h0
370
+ %div = fdiv reassoc arcp contract afn half %mul , %h0
371
+ %add = fadd reassoc arcp contract afn half %div , %h0
372
+ %res = fpext half %add to float
373
+ ret float %res
374
+ }
375
+
242
376
define amdgpu_ps half @image_gather4_2d_v4f32 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %s , half %t ) {
243
377
; GFX7-LABEL: @image_gather4_2d_v4f32(
244
378
; GFX7-NEXT: main_body:
0 commit comments