@@ -410,19 +410,40 @@ define amdgpu_cs half @srcmods_neg_f16(half inreg %src) {
410410 ret half %result
411411}
412412
413- declare half @llvm.exp2.f16 (half )
414- declare float @llvm.exp2.f32 (float )
415- declare half @llvm.amdgcn.exp2.f16 (half )
416- declare float @llvm.amdgcn.exp2.f32 (float )
417- declare half @llvm.log2.f16 (half )
418- declare float @llvm.log2.f32 (float )
419- declare half @llvm.amdgcn.log.f16 (half )
420- declare float @llvm.amdgcn.log.f32 (float )
421- declare half @llvm.amdgcn.rcp.f16 (half )
422- declare float @llvm.amdgcn.rcp.f32 (float )
423- declare half @llvm.sqrt.f16 (half )
424- declare float @llvm.sqrt.f32 (float )
425- declare half @llvm.amdgcn.sqrt.f16 (half )
426- declare float @llvm.amdgcn.sqrt.f32 (float )
427- declare half @llvm.fabs.f16 (half )
428- declare float @llvm.fabs.f32 (float )
413+ ; TODO: SelectionDAG should avoid generating v_rcp_iflag_f32.
414+ define amdgpu_cs float @fdiv_f32_i32 (float inreg %a , i32 inreg %b ) {
415+ ; GFX12-SDAG-LABEL: fdiv_f32_i32:
416+ ; GFX12-SDAG: ; %bb.0:
417+ ; GFX12-SDAG-NEXT: s_cvt_f32_u32 s1, s1
418+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(TRANS32_DEP_1)
419+ ; GFX12-SDAG-NEXT: v_rcp_iflag_f32_e32 v0, s1
420+ ; GFX12-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0
421+ ; GFX12-SDAG-NEXT: ; return to shader part epilog
422+ ;
423+ ; GFX12-GISEL-LABEL: fdiv_f32_i32:
424+ ; GFX12-GISEL: ; %bb.0:
425+ ; GFX12-GISEL-NEXT: s_cvt_f32_u32 s1, s1
426+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(TRANS32_DEP_1)
427+ ; GFX12-GISEL-NEXT: v_s_rcp_f32 s1, s1
428+ ; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1
429+ ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
430+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
431+ ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
432+ ; GFX12-GISEL-NEXT: ; return to shader part epilog
433+ %uint = uitofp i32 %b to float
434+ %result = fdiv afn float %a , %uint
435+ ret float %result
436+ }
437+
438+ define amdgpu_cs half @fdiv_f16_i16 (half inreg %a , i16 inreg %b ) {
439+ ; GFX12-LABEL: fdiv_f16_i16:
440+ ; GFX12: ; %bb.0:
441+ ; GFX12-NEXT: v_cvt_f16_u16_e32 v0, s1
442+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
443+ ; GFX12-NEXT: v_rcp_f16_e32 v0, v0
444+ ; GFX12-NEXT: v_mul_f16_e32 v0, s0, v0
445+ ; GFX12-NEXT: ; return to shader part epilog
446+ %uint = uitofp i16 %b to half
447+ %result = fdiv afn half %a , %uint
448+ ret half %result
449+ }
0 commit comments