Handle division on halfs the same way as floats

KanclerzPiotr · igcbot · commit 31fe1d123d38 · 2025-05-12T14:23:40.000+02:00
Previous implementation worked with reciprocal multiplication x/y = x * 1/y.
This implementation does not always return 1.0 for x/x that can propagate to
cases where floor(x/x) returns 0.
Now the half behaviour is the same as float with additional checks.
diff --git a/IGC/Compiler/LegalizationPass.cpp b/IGC/Compiler/LegalizationPass.cpp
@@ -2822,11 +2822,15 @@ bool IGC::expandFDIVInstructions(llvm::Function &F, ShaderType ShaderTy) {
                     V = Builder.CreateFMul(Y, X);
                 }
                 else {
-                    // Up cast to float, do rcp+mul in float, and down cast to half / bfloat.
+                    // Up cast to float, and down cast to half / bfloat.
+                    // div as float with additional checks for better precision and special cases like Inf, NaN. to be spec conformant.
                     Y = Builder.CreateFPExt(Y, Builder.getFloatTy());
-                    Y = Builder.CreateFDiv(ConstantFP::get(Ctx, APFloat(1.0f)), Y);
                     X = Builder.CreateFPExt(X, Builder.getFloatTy());
-                    V = Builder.CreateFMul(Y, X);
+                    V = Builder.CreateFDiv(X, Y);
+                    // Iterator at the begining of the loop is already at the next instruction,
+                    // so we want to set it back to handle this fdiv as normal one.
+                    Iter = BasicBlock::iterator(dyn_cast<Instruction>(V));
+
                     V = Builder.CreateFPTrunc(V, Inst->getType());
                 }
             }
diff --git a/IGC/Compiler/tests/GenFDIVEmulation/basic-typed-pointers.ll b/IGC/Compiler/tests/GenFDIVEmulation/basic-typed-pointers.ll
@@ -57,12 +57,28 @@ define void @test_fdiv_arcp(float %a, float %b) {
 
 define void @test_fdiv_half(half %a, half %b) {
 ; CHECK-LABEL: @test_fdiv_half(
-; CHECK:    [[TMP1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
-; CHECK:    [[TMP2:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP1]]
-; CHECK:    [[TMP3:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
-; CHECK:    [[TMP4:%[A-z0-9]*]] = fmul float [[TMP2]], [[TMP3]]
-; CHECK:    [[TMP5:%[A-z0-9]*]] = fptrunc float [[TMP4]] to half
-; CHECK:    call void @use.f16(half [[TMP5]])
+; CHECK:    [[EXT1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
+; CHECK:    [[EXT2:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
+; CHECK:    [[TMP1:%[A-z0-9]+]] = bitcast float [[B:%[A-z0-9]*]] to i32
+; CHECK:    [[TMP2:%[A-z0-9]+]] = and i32 [[TMP1]], 2139095040
+; CHECK:    [[TMP3:%[A-z0-9]+]] = icmp eq i32 [[TMP2]], 0
+; CHECK:    [[TMP4:%[A-z0-9]+]] = select i1 [[TMP3]], float 0x41F0000000000000, float 1.000000e+00
+; CHECK:    [[TMP5:%[A-z0-9]+]] = icmp uge i32 [[TMP2]], 1677721600
+; CHECK:    [[TMP6:%[A-z0-9]+]] = select i1 [[TMP5]], float 0x3DF0000000000000, float [[TMP4]]
+; CHECK:    [[TMP7:%[A-z0-9]+]] = fmul float [[B]], [[TMP6]]
+; CHECK:    [[TMP8:%[A-z0-9]+]] = fdiv float 1.000000e+00, [[TMP7]]
+; CHECK:    [[TMP9:%[A-z0-9]+]] = fmul float [[TMP8]], [[A:%[A-z0-9]*]]
+; CHECK:    [[TMP10:%[A-z0-9]+]] = fmul float [[TMP9]], [[TMP6]]
+; CHECK:    [[TMP11:%[A-z0-9]+]] = fcmp oeq float [[A]], [[B]]
+; CHECK:    [[TMP12:%[A-z0-9]+]] = and i32 [[TMP1]], 8388607
+; CHECK:    [[TMP13:%[A-z0-9]+]] = icmp eq i32 [[TMP2]], 0
+; CHECK:    [[TMP14:%[A-z0-9]+]] = icmp eq i32 [[TMP12]], 0
+; CHECK:    [[TMP15:%[A-z0-9]+]] = or i1 [[TMP13]], [[TMP14]]
+; CHECK:    [[TMP16:%[A-z0-9]+]] = xor i1 [[TMP15]], true
+; CHECK:    [[TMP17:%[A-z0-9]+]] = and i1 [[TMP11]], [[TMP16]]
+; CHECK:    [[TMP18:%[A-z0-9]+]] = select i1 [[TMP17]], float 1.000000e+00, float [[TMP10]]
+; CHECK:    [[TRUNC:%[A-z0-9]*]] = fptrunc float [[TMP18]] to half
+; CHECK:    call void @use.f16(half [[TRUNC]])
 ; CHECK:    ret void
 
   %1 = fdiv half %a, %b
diff --git a/IGC/Compiler/tests/GenFDIVEmulation/basic.ll b/IGC/Compiler/tests/GenFDIVEmulation/basic.ll
@@ -59,12 +59,28 @@ define void @test_fdiv_arcp(float %a, float %b) {
 
 define void @test_fdiv_half(half %a, half %b) {
 ; CHECK-LABEL: @test_fdiv_half(
-; CHECK:    [[TMP1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
-; CHECK:    [[TMP2:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP1]]
-; CHECK:    [[TMP3:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
-; CHECK:    [[TMP4:%[A-z0-9]*]] = fmul float [[TMP2]], [[TMP3]]
-; CHECK:    [[TMP5:%[A-z0-9]*]] = fptrunc float [[TMP4]] to half
-; CHECK:    call void @use.f16(half [[TMP5]])
+; CHECK:    [[EXT1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
+; CHECK:    [[EXT2:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
+; CHECK:    [[TMP1:%[A-z0-9]+]] = bitcast float [[B:%[A-z0-9]*]] to i32
+; CHECK:    [[TMP2:%[A-z0-9]+]] = and i32 [[TMP1]], 2139095040
+; CHECK:    [[TMP3:%[A-z0-9]+]] = icmp eq i32 [[TMP2]], 0
+; CHECK:    [[TMP4:%[A-z0-9]+]] = select i1 [[TMP3]], float 0x41F0000000000000, float 1.000000e+00
+; CHECK:    [[TMP5:%[A-z0-9]+]] = icmp uge i32 [[TMP2]], 1677721600
+; CHECK:    [[TMP6:%[A-z0-9]+]] = select i1 [[TMP5]], float 0x3DF0000000000000, float [[TMP4]]
+; CHECK:    [[TMP7:%[A-z0-9]+]] = fmul float [[B]], [[TMP6]]
+; CHECK:    [[TMP8:%[A-z0-9]+]] = fdiv float 1.000000e+00, [[TMP7]]
+; CHECK:    [[TMP9:%[A-z0-9]+]] = fmul float [[TMP8]], [[A:%[A-z0-9]*]]
+; CHECK:    [[TMP10:%[A-z0-9]+]] = fmul float [[TMP9]], [[TMP6]]
+; CHECK:    [[TMP11:%[A-z0-9]+]] = fcmp oeq float [[A]], [[B]]
+; CHECK:    [[TMP12:%[A-z0-9]+]] = and i32 [[TMP1]], 8388607
+; CHECK:    [[TMP13:%[A-z0-9]+]] = icmp eq i32 [[TMP2]], 0
+; CHECK:    [[TMP14:%[A-z0-9]+]] = icmp eq i32 [[TMP12]], 0
+; CHECK:    [[TMP15:%[A-z0-9]+]] = or i1 [[TMP13]], [[TMP14]]
+; CHECK:    [[TMP16:%[A-z0-9]+]] = xor i1 [[TMP15]], true
+; CHECK:    [[TMP17:%[A-z0-9]+]] = and i1 [[TMP11]], [[TMP16]]
+; CHECK:    [[TMP18:%[A-z0-9]+]] = select i1 [[TMP17]], float 1.000000e+00, float [[TMP10]]
+; CHECK:    [[TRUNC:%[A-z0-9]*]] = fptrunc float [[TMP18]] to half
+; CHECK:    call void @use.f16(half [[TRUNC]])
 ; CHECK:    ret void
 
   %1 = fdiv half %a, %b
diff --git a/IGC/ocloc_tests/features/bfloat/test_bfloat.ll b/IGC/ocloc_tests/features/bfloat/test_bfloat.ll
@@ -130,11 +130,25 @@ define spir_kernel void @test_fdiv(
 entry:
   %res = fdiv bfloat %b1, %b2
   store bfloat %res, bfloat addrspace(1)* %out1, align 2
-; fdiv is expanded to float inv+mul
-; CHECK-VISA-DAG: inv {{.*}} [[RESINV:.*]](0,0)<1> {{.*}}<0;1,0>
-; CHECK-VISA-DAG: mul {{.*}} [[RESMUL:.*]](0,0)<1> [[RESINV]](0,0)<0;1,0> [[SRC1:.*]](0,0)<0;1,0>
-; CHECK-VISA-DAG: mov (M1_NM, 1) [[RES:.*]](0,0)<1> [[RESMUL]](0,0)<0;1,0>
-; CHECK-VISA-DAG: .decl [[RES]] {{.*}} type=bf {{.*}}
+; CHECK-VISA-DAG: and {{.*}} [[YExp:.*]](0,0)<1> [[Y_asInt:.*]](0,0)<0;1,0> 0x7f800000:d
+; CHECK-VISA-DAG: cmp.eq {{.*}} [[P1:.*]] [[YExp]](0,0)<0;1,0> 0x0:d
+; CHECK-VISA-DAG: ([[P1]]) sel {{.*}} [[ScaleUp:.*]](0,0)<1> 0x4f800000:f 0x3f800000:f
+; CHECK-VISA-DAG: cmp.ge {{.*}} [[P2:.*]] {{.*}} 0x64000000:ud
+; CHECK-VISA-DAG: ([[P2]]) sel {{.*}} [[Scale:.*]](0,0)<1> 0x2f800000:f [[ScaleUp]](0,0)<0;1,0>
+; CHECK-VISA-DAG: mul {{.*}} [[ScaledY:.*]](0,0)<1> [[Y:.*]](0,0)<0;1,0> [[Scale]](0,0)<0;1,0>
+; CHECK-VISA-DAG: inv {{.*}} [[ResInv:.*]](0,0)<1> [[ScaledY]](0,0)<0;1,0>
+; CHECK-VISA-DAG: mul {{.*}} [[TMP:.*]](0,0)<1> [[ResInv]](0,0)<0;1,0> [[X:.*]](0,0)<0;1,0>
+; CHECK-VISA-DAG: mul {{.*}} [[ResMul:.*]](0,0)<1> [[TMP]](0,0)<0;1,0> [[Scale]](0,0)<0;1,0>
+; CHECK-VISA-DAG: and {{.*}} [[YMantisa:.*]](0,0)<1> [[Y_asInt]](0,0)<0;1,0> 0x7fffff:d
+; CHECK-VISA-DAG: cmp.eq {{.*}} [[P3:.*]] [[YMantisa]](0,0)<0;1,0> 0x0:d
+; CHECK-VISA-DAG: cmp.eq {{.*}} [[P4:.*]] [[YExp]](0,0)<0;1,0> 0x0:d
+; CHECK-VISA-DAG: or {{.*}} [[IsZeroOrSubnormal:.*]] [[P4]] [[P3]]
+; CHECK-VISA-DAG: not {{.*}} [[IsNotZeroOrSubnormal:.*]] [[IsZeroOrSubnormal]]
+; CHECK-VISA-DAG: cmp.eq {{.*}} [[IsEqual:.*]] [[X]](0,0)<0;1,0> [[Y]](0,0)<0;1,0>
+; CHECK-VISA-DAG: and {{.*}} [[IsNotProperValue:.*]] [[IsEqual]] [[IsNotZeroOrSubnormal]]
+; CHECK-VISA-DAG: ([[IsNotProperValue]]) sel {{.*}} [[Res:.*]](0,0)<1> 0x3f800000:f  [[ResMul]]
+; CHECK-VISA-DAG: mov (M1_NM, 1) [[StoreRes:.*]](0,0)<1> [[Res]](0,0)<0;1,0>
+; CHECK-VISA-DAG: .decl [[StoreRes]] {{.*}} type=bf {{.*}}
   %res2 = fdiv <2 x bfloat> %b1_2, %b2_2
   store <2 x bfloat> %res2, <2 x bfloat> addrspace(1)* %out2, align 4
   %res4 = fdiv <4 x bfloat> %b1_4, %b2_4

Original file line number	Diff line number	Diff line change
`@@ -2822,11 +2822,15 @@ bool IGC::expandFDIVInstructions(llvm::Function &F, ShaderType ShaderTy) {`
`2822`	`2822`	`V = Builder.CreateFMul(Y, X);`
`2823`	`2823`	`}`
`2824`	`2824`	`else {`
`2825`		`- // Up cast to float, do rcp+mul in float, and down cast to half / bfloat.`
	`2825`	`+ // Up cast to float, and down cast to half / bfloat.`
	`2826`	`+ // div as float with additional checks for better precision and special cases like Inf, NaN. to be spec conformant.`
`2826`	`2827`	`Y = Builder.CreateFPExt(Y, Builder.getFloatTy());`
`2827`		`- Y = Builder.CreateFDiv(ConstantFP::get(Ctx, APFloat(1.0f)), Y);`
`2828`	`2828`	`X = Builder.CreateFPExt(X, Builder.getFloatTy());`
`2829`		`- V = Builder.CreateFMul(Y, X);`
	`2829`	`+ V = Builder.CreateFDiv(X, Y);`
	`2830`	`+ // Iterator at the begining of the loop is already at the next instruction,`
	`2831`	`+ // so we want to set it back to handle this fdiv as normal one.`
	`2832`	`+ Iter = BasicBlock::iterator(dyn_cast<Instruction>(V));`
	`2833`	`+`
`2830`	`2834`	`V = Builder.CreateFPTrunc(V, Inst->getType());`
`2831`	`2835`	`}`
`2832`	`2836`	`}`