@@ -323,6 +323,135 @@ define amdgpu_ps void @v_test_max_v2bf16_vl(ptr addrspace(1) %out, <2 x bfloat>
323323 ret void
324324}
325325
326+ define amdgpu_ps bfloat @test_clamp_bf16 (bfloat %src ) {
327+ ; GCN-LABEL: test_clamp_bf16:
328+ ; GCN: ; %bb.0:
329+ ; GCN-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
330+ ; GCN-NEXT: ; return to shader part epilog
331+ %max = call bfloat @llvm.maxnum.bf16 (bfloat %src , bfloat 0 .0 )
332+ %clamp = call bfloat @llvm.minnum.bf16 (bfloat %max , bfloat 1 .0 )
333+ ret bfloat %clamp
334+ }
335+
336+ define amdgpu_ps bfloat @test_clamp_bf16_s (bfloat inreg %src ) {
337+ ; GCN-LABEL: test_clamp_bf16_s:
338+ ; GCN: ; %bb.0:
339+ ; GCN-NEXT: v_pk_max_num_bf16 v0, s0, s0 clamp
340+ ; GCN-NEXT: ; return to shader part epilog
341+ %max = call bfloat @llvm.maxnum.bf16 (bfloat %src , bfloat 0 .0 )
342+ %clamp = call bfloat @llvm.minnum.bf16 (bfloat %max , bfloat 1 .0 )
343+ ret bfloat %clamp
344+ }
345+
346+ define amdgpu_ps float @test_clamp_v2bf16 (<2 x bfloat> %src ) {
347+ ; GCN-LABEL: test_clamp_v2bf16:
348+ ; GCN: ; %bb.0:
349+ ; GCN-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
350+ ; GCN-NEXT: ; return to shader part epilog
351+ %max = call <2 x bfloat> @llvm.maxnum.v2bf16 (<2 x bfloat> %src , <2 x bfloat> <bfloat 0 .0 , bfloat 0 .0 >)
352+ %clamp = call <2 x bfloat> @llvm.minnum.v2bf16 (<2 x bfloat> %max , <2 x bfloat> <bfloat 1 .0 , bfloat 1 .0 >)
353+ %ret = bitcast <2 x bfloat> %clamp to float
354+ ret float %ret
355+ }
356+
357+ define amdgpu_ps float @test_clamp_v2bf16_s (<2 x bfloat> inreg %src ) {
358+ ; GCN-LABEL: test_clamp_v2bf16_s:
359+ ; GCN: ; %bb.0:
360+ ; GCN-NEXT: v_pk_max_num_bf16 v0, s0, s0 clamp
361+ ; GCN-NEXT: ; return to shader part epilog
362+ %max = call <2 x bfloat> @llvm.maxnum.v2bf16 (<2 x bfloat> %src , <2 x bfloat> <bfloat 0 .0 , bfloat 0 .0 >)
363+ %clamp = call <2 x bfloat> @llvm.minnum.v2bf16 (<2 x bfloat> %max , <2 x bfloat> <bfloat 1 .0 , bfloat 1 .0 >)
364+ %ret = bitcast <2 x bfloat> %clamp to float
365+ ret float %ret
366+ }
367+
368+ define amdgpu_ps bfloat @test_clamp_bf16_folding (bfloat %src ) {
369+ ; GCN-LABEL: test_clamp_bf16_folding:
370+ ; GCN: ; %bb.0:
371+ ; GCN-NEXT: v_exp_bf16_e64 v0, v0 clamp
372+ ; GCN-NEXT: ; return to shader part epilog
373+ %exp = call bfloat @llvm.exp2.bf16 (bfloat %src )
374+ %max = call bfloat @llvm.maxnum.bf16 (bfloat %exp , bfloat 0 .0 )
375+ %clamp = call bfloat @llvm.minnum.bf16 (bfloat %max , bfloat 1 .0 )
376+ ret bfloat %clamp
377+ }
378+
379+ define amdgpu_ps float @test_clamp_v2bf16_folding (<2 x bfloat> %src0 , <2 x bfloat> %src1 ) {
380+ ; GCN-LABEL: test_clamp_v2bf16_folding:
381+ ; GCN: ; %bb.0:
382+ ; GCN-NEXT: v_pk_mul_bf16 v0, v0, v1 clamp
383+ ; GCN-NEXT: ; return to shader part epilog
384+ %mul = fmul <2 x bfloat> %src0 , %src1
385+ %max = call <2 x bfloat> @llvm.maxnum.v2bf16 (<2 x bfloat> %mul , <2 x bfloat> <bfloat 0 .0 , bfloat 0 .0 >)
386+ %clamp = call <2 x bfloat> @llvm.minnum.v2bf16 (<2 x bfloat> %max , <2 x bfloat> <bfloat 1 .0 , bfloat 1 .0 >)
387+ %ret = bitcast <2 x bfloat> %clamp to float
388+ ret float %ret
389+ }
390+
391+ define amdgpu_ps void @v_test_mul_add_v2bf16_vvv (ptr addrspace (1 ) %out , <2 x bfloat> %a , <2 x bfloat> %b , <2 x bfloat> %c ) {
392+ ; GCN-LABEL: v_test_mul_add_v2bf16_vvv:
393+ ; GCN: ; %bb.0:
394+ ; GCN-NEXT: v_pk_fma_bf16 v2, v2, v3, v4
395+ ; GCN-NEXT: global_store_b32 v[0:1], v2, off
396+ ; GCN-NEXT: s_endpgm
397+ %mul = fmul contract <2 x bfloat> %a , %b
398+ %add = fadd contract <2 x bfloat> %mul , %c
399+ store <2 x bfloat> %add , ptr addrspace (1 ) %out
400+ ret void
401+ }
402+
403+ define amdgpu_ps void @v_test_mul_add_v2bf16_vss (ptr addrspace (1 ) %out , <2 x bfloat> %a , <2 x bfloat> inreg %b , <2 x bfloat> inreg %c ) {
404+ ; GCN-LABEL: v_test_mul_add_v2bf16_vss:
405+ ; GCN: ; %bb.0:
406+ ; GCN-NEXT: v_pk_fma_bf16 v2, v2, s0, s1
407+ ; GCN-NEXT: global_store_b32 v[0:1], v2, off
408+ ; GCN-NEXT: s_endpgm
409+ %mul = fmul contract <2 x bfloat> %a , %b
410+ %add = fadd contract <2 x bfloat> %mul , %c
411+ store <2 x bfloat> %add , ptr addrspace (1 ) %out
412+ ret void
413+ }
414+
415+ define amdgpu_ps void @v_test_mul_add_v2bf16_sss (ptr addrspace (1 ) %out , <2 x bfloat> inreg %a , <2 x bfloat> inreg %b , <2 x bfloat> inreg %c ) {
416+ ; GCN-LABEL: v_test_mul_add_v2bf16_sss:
417+ ; GCN: ; %bb.0:
418+ ; GCN-NEXT: v_mov_b32_e32 v2, s2
419+ ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
420+ ; GCN-NEXT: v_pk_fma_bf16 v2, s0, s1, v2
421+ ; GCN-NEXT: global_store_b32 v[0:1], v2, off
422+ ; GCN-NEXT: s_endpgm
423+ %mul = fmul contract <2 x bfloat> %a , %b
424+ %add = fadd contract <2 x bfloat> %mul , %c
425+ store <2 x bfloat> %add , ptr addrspace (1 ) %out
426+ ret void
427+ }
428+
429+ define amdgpu_ps void @v_test_mul_add_v2bf16_vsc (ptr addrspace (1 ) %out , <2 x bfloat> %a , <2 x bfloat> inreg %b ) {
430+ ; GCN-LABEL: v_test_mul_add_v2bf16_vsc:
431+ ; GCN: ; %bb.0:
432+ ; GCN-NEXT: v_pk_fma_bf16 v2, v2, s0, 0.5 op_sel_hi:[1,1,0]
433+ ; GCN-NEXT: global_store_b32 v[0:1], v2, off
434+ ; GCN-NEXT: s_endpgm
435+ %mul = fmul contract <2 x bfloat> %a , %b
436+ %add = fadd contract <2 x bfloat> %mul , <bfloat 0 .5 , bfloat 0 .5 >
437+ store <2 x bfloat> %add , ptr addrspace (1 ) %out
438+ ret void
439+ }
440+
441+ define amdgpu_ps void @v_test_mul_add_v2bf16_vll (ptr addrspace (1 ) %out , <2 x bfloat> %a ) {
442+ ; GCN-LABEL: v_test_mul_add_v2bf16_vll:
443+ ; GCN: ; %bb.0:
444+ ; GCN-NEXT: s_mov_b32 s0, 0x43484000
445+ ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
446+ ; GCN-NEXT: v_pk_fma_bf16 v2, 0x42c83f80, v2, s0
447+ ; GCN-NEXT: global_store_b32 v[0:1], v2, off
448+ ; GCN-NEXT: s_endpgm
449+ %mul = fmul contract <2 x bfloat> %a , <bfloat 1 .0 , bfloat 100 .0 >
450+ %add = fadd contract <2 x bfloat> %mul , <bfloat 2 .0 , bfloat 200 .0 >
451+ store <2 x bfloat> %add , ptr addrspace (1 ) %out
452+ ret void
453+ }
454+
326455define amdgpu_ps void @v_test_fma_v2bf16_vvv (ptr addrspace (1 ) %out , <2 x bfloat> %a , <2 x bfloat> %b , <2 x bfloat> %c ) {
327456; GCN-LABEL: v_test_fma_v2bf16_vvv:
328457; GCN: ; %bb.0:
@@ -426,6 +555,8 @@ define amdgpu_ps void @llvm_exp2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src
426555 ret void
427556}
428557
558+ declare bfloat @llvm.minnum.bf16 (bfloat, bfloat)
559+ declare bfloat @llvm.maxnum.bf16 (bfloat, bfloat)
429560declare <2 x bfloat> @llvm.minnum.v2bf16 (<2 x bfloat> %a , <2 x bfloat> %b )
430561declare <2 x bfloat> @llvm.maxnum.v2bf16 (<2 x bfloat> %a , <2 x bfloat> %b )
431562declare <2 x bfloat> @llvm.fma.v2bf16 (<2 x bfloat>, <2 x bfloat>, <2 x bfloat>)
0 commit comments