Skip to content

Commit 31fe1d1

Browse files
KanclerzPiotrigcbot
authored andcommitted
Handle division on halfs the same way as floats
Previous implementation worked with reciprocal multiplication x/y = x * 1/y. This implementation does not always return 1.0 for x/x that can propagate to cases where floor(x/x) returns 0. Now the half behaviour is the same as float with additional checks.
1 parent 9ea5948 commit 31fe1d1

File tree

4 files changed

+70
-20
lines changed

4 files changed

+70
-20
lines changed

IGC/Compiler/LegalizationPass.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2822,11 +2822,15 @@ bool IGC::expandFDIVInstructions(llvm::Function &F, ShaderType ShaderTy) {
28222822
V = Builder.CreateFMul(Y, X);
28232823
}
28242824
else {
2825-
// Up cast to float, do rcp+mul in float, and down cast to half / bfloat.
2825+
// Up cast to float, and down cast to half / bfloat.
2826+
// div as float with additional checks for better precision and special cases like Inf, NaN. to be spec conformant.
28262827
Y = Builder.CreateFPExt(Y, Builder.getFloatTy());
2827-
Y = Builder.CreateFDiv(ConstantFP::get(Ctx, APFloat(1.0f)), Y);
28282828
X = Builder.CreateFPExt(X, Builder.getFloatTy());
2829-
V = Builder.CreateFMul(Y, X);
2829+
V = Builder.CreateFDiv(X, Y);
2830+
// Iterator at the begining of the loop is already at the next instruction,
2831+
// so we want to set it back to handle this fdiv as normal one.
2832+
Iter = BasicBlock::iterator(dyn_cast<Instruction>(V));
2833+
28302834
V = Builder.CreateFPTrunc(V, Inst->getType());
28312835
}
28322836
}

IGC/Compiler/tests/GenFDIVEmulation/basic-typed-pointers.ll

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,28 @@ define void @test_fdiv_arcp(float %a, float %b) {
5757

5858
define void @test_fdiv_half(half %a, half %b) {
5959
; CHECK-LABEL: @test_fdiv_half(
60-
; CHECK: [[TMP1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
61-
; CHECK: [[TMP2:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP1]]
62-
; CHECK: [[TMP3:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
63-
; CHECK: [[TMP4:%[A-z0-9]*]] = fmul float [[TMP2]], [[TMP3]]
64-
; CHECK: [[TMP5:%[A-z0-9]*]] = fptrunc float [[TMP4]] to half
65-
; CHECK: call void @use.f16(half [[TMP5]])
60+
; CHECK: [[EXT1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
61+
; CHECK: [[EXT2:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
62+
; CHECK: [[TMP1:%[A-z0-9]+]] = bitcast float [[B:%[A-z0-9]*]] to i32
63+
; CHECK: [[TMP2:%[A-z0-9]+]] = and i32 [[TMP1]], 2139095040
64+
; CHECK: [[TMP3:%[A-z0-9]+]] = icmp eq i32 [[TMP2]], 0
65+
; CHECK: [[TMP4:%[A-z0-9]+]] = select i1 [[TMP3]], float 0x41F0000000000000, float 1.000000e+00
66+
; CHECK: [[TMP5:%[A-z0-9]+]] = icmp uge i32 [[TMP2]], 1677721600
67+
; CHECK: [[TMP6:%[A-z0-9]+]] = select i1 [[TMP5]], float 0x3DF0000000000000, float [[TMP4]]
68+
; CHECK: [[TMP7:%[A-z0-9]+]] = fmul float [[B]], [[TMP6]]
69+
; CHECK: [[TMP8:%[A-z0-9]+]] = fdiv float 1.000000e+00, [[TMP7]]
70+
; CHECK: [[TMP9:%[A-z0-9]+]] = fmul float [[TMP8]], [[A:%[A-z0-9]*]]
71+
; CHECK: [[TMP10:%[A-z0-9]+]] = fmul float [[TMP9]], [[TMP6]]
72+
; CHECK: [[TMP11:%[A-z0-9]+]] = fcmp oeq float [[A]], [[B]]
73+
; CHECK: [[TMP12:%[A-z0-9]+]] = and i32 [[TMP1]], 8388607
74+
; CHECK: [[TMP13:%[A-z0-9]+]] = icmp eq i32 [[TMP2]], 0
75+
; CHECK: [[TMP14:%[A-z0-9]+]] = icmp eq i32 [[TMP12]], 0
76+
; CHECK: [[TMP15:%[A-z0-9]+]] = or i1 [[TMP13]], [[TMP14]]
77+
; CHECK: [[TMP16:%[A-z0-9]+]] = xor i1 [[TMP15]], true
78+
; CHECK: [[TMP17:%[A-z0-9]+]] = and i1 [[TMP11]], [[TMP16]]
79+
; CHECK: [[TMP18:%[A-z0-9]+]] = select i1 [[TMP17]], float 1.000000e+00, float [[TMP10]]
80+
; CHECK: [[TRUNC:%[A-z0-9]*]] = fptrunc float [[TMP18]] to half
81+
; CHECK: call void @use.f16(half [[TRUNC]])
6682
; CHECK: ret void
6783

6884
%1 = fdiv half %a, %b

IGC/Compiler/tests/GenFDIVEmulation/basic.ll

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,28 @@ define void @test_fdiv_arcp(float %a, float %b) {
5959

6060
define void @test_fdiv_half(half %a, half %b) {
6161
; CHECK-LABEL: @test_fdiv_half(
62-
; CHECK: [[TMP1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
63-
; CHECK: [[TMP2:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP1]]
64-
; CHECK: [[TMP3:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
65-
; CHECK: [[TMP4:%[A-z0-9]*]] = fmul float [[TMP2]], [[TMP3]]
66-
; CHECK: [[TMP5:%[A-z0-9]*]] = fptrunc float [[TMP4]] to half
67-
; CHECK: call void @use.f16(half [[TMP5]])
62+
; CHECK: [[EXT1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
63+
; CHECK: [[EXT2:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
64+
; CHECK: [[TMP1:%[A-z0-9]+]] = bitcast float [[B:%[A-z0-9]*]] to i32
65+
; CHECK: [[TMP2:%[A-z0-9]+]] = and i32 [[TMP1]], 2139095040
66+
; CHECK: [[TMP3:%[A-z0-9]+]] = icmp eq i32 [[TMP2]], 0
67+
; CHECK: [[TMP4:%[A-z0-9]+]] = select i1 [[TMP3]], float 0x41F0000000000000, float 1.000000e+00
68+
; CHECK: [[TMP5:%[A-z0-9]+]] = icmp uge i32 [[TMP2]], 1677721600
69+
; CHECK: [[TMP6:%[A-z0-9]+]] = select i1 [[TMP5]], float 0x3DF0000000000000, float [[TMP4]]
70+
; CHECK: [[TMP7:%[A-z0-9]+]] = fmul float [[B]], [[TMP6]]
71+
; CHECK: [[TMP8:%[A-z0-9]+]] = fdiv float 1.000000e+00, [[TMP7]]
72+
; CHECK: [[TMP9:%[A-z0-9]+]] = fmul float [[TMP8]], [[A:%[A-z0-9]*]]
73+
; CHECK: [[TMP10:%[A-z0-9]+]] = fmul float [[TMP9]], [[TMP6]]
74+
; CHECK: [[TMP11:%[A-z0-9]+]] = fcmp oeq float [[A]], [[B]]
75+
; CHECK: [[TMP12:%[A-z0-9]+]] = and i32 [[TMP1]], 8388607
76+
; CHECK: [[TMP13:%[A-z0-9]+]] = icmp eq i32 [[TMP2]], 0
77+
; CHECK: [[TMP14:%[A-z0-9]+]] = icmp eq i32 [[TMP12]], 0
78+
; CHECK: [[TMP15:%[A-z0-9]+]] = or i1 [[TMP13]], [[TMP14]]
79+
; CHECK: [[TMP16:%[A-z0-9]+]] = xor i1 [[TMP15]], true
80+
; CHECK: [[TMP17:%[A-z0-9]+]] = and i1 [[TMP11]], [[TMP16]]
81+
; CHECK: [[TMP18:%[A-z0-9]+]] = select i1 [[TMP17]], float 1.000000e+00, float [[TMP10]]
82+
; CHECK: [[TRUNC:%[A-z0-9]*]] = fptrunc float [[TMP18]] to half
83+
; CHECK: call void @use.f16(half [[TRUNC]])
6884
; CHECK: ret void
6985

7086
%1 = fdiv half %a, %b

IGC/ocloc_tests/features/bfloat/test_bfloat.ll

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,25 @@ define spir_kernel void @test_fdiv(
130130
entry:
131131
%res = fdiv bfloat %b1, %b2
132132
store bfloat %res, bfloat addrspace(1)* %out1, align 2
133-
; fdiv is expanded to float inv+mul
134-
; CHECK-VISA-DAG: inv {{.*}} [[RESINV:.*]](0,0)<1> {{.*}}<0;1,0>
135-
; CHECK-VISA-DAG: mul {{.*}} [[RESMUL:.*]](0,0)<1> [[RESINV]](0,0)<0;1,0> [[SRC1:.*]](0,0)<0;1,0>
136-
; CHECK-VISA-DAG: mov (M1_NM, 1) [[RES:.*]](0,0)<1> [[RESMUL]](0,0)<0;1,0>
137-
; CHECK-VISA-DAG: .decl [[RES]] {{.*}} type=bf {{.*}}
133+
; CHECK-VISA-DAG: and {{.*}} [[YExp:.*]](0,0)<1> [[Y_asInt:.*]](0,0)<0;1,0> 0x7f800000:d
134+
; CHECK-VISA-DAG: cmp.eq {{.*}} [[P1:.*]] [[YExp]](0,0)<0;1,0> 0x0:d
135+
; CHECK-VISA-DAG: ([[P1]]) sel {{.*}} [[ScaleUp:.*]](0,0)<1> 0x4f800000:f 0x3f800000:f
136+
; CHECK-VISA-DAG: cmp.ge {{.*}} [[P2:.*]] {{.*}} 0x64000000:ud
137+
; CHECK-VISA-DAG: ([[P2]]) sel {{.*}} [[Scale:.*]](0,0)<1> 0x2f800000:f [[ScaleUp]](0,0)<0;1,0>
138+
; CHECK-VISA-DAG: mul {{.*}} [[ScaledY:.*]](0,0)<1> [[Y:.*]](0,0)<0;1,0> [[Scale]](0,0)<0;1,0>
139+
; CHECK-VISA-DAG: inv {{.*}} [[ResInv:.*]](0,0)<1> [[ScaledY]](0,0)<0;1,0>
140+
; CHECK-VISA-DAG: mul {{.*}} [[TMP:.*]](0,0)<1> [[ResInv]](0,0)<0;1,0> [[X:.*]](0,0)<0;1,0>
141+
; CHECK-VISA-DAG: mul {{.*}} [[ResMul:.*]](0,0)<1> [[TMP]](0,0)<0;1,0> [[Scale]](0,0)<0;1,0>
142+
; CHECK-VISA-DAG: and {{.*}} [[YMantisa:.*]](0,0)<1> [[Y_asInt]](0,0)<0;1,0> 0x7fffff:d
143+
; CHECK-VISA-DAG: cmp.eq {{.*}} [[P3:.*]] [[YMantisa]](0,0)<0;1,0> 0x0:d
144+
; CHECK-VISA-DAG: cmp.eq {{.*}} [[P4:.*]] [[YExp]](0,0)<0;1,0> 0x0:d
145+
; CHECK-VISA-DAG: or {{.*}} [[IsZeroOrSubnormal:.*]] [[P4]] [[P3]]
146+
; CHECK-VISA-DAG: not {{.*}} [[IsNotZeroOrSubnormal:.*]] [[IsZeroOrSubnormal]]
147+
; CHECK-VISA-DAG: cmp.eq {{.*}} [[IsEqual:.*]] [[X]](0,0)<0;1,0> [[Y]](0,0)<0;1,0>
148+
; CHECK-VISA-DAG: and {{.*}} [[IsNotProperValue:.*]] [[IsEqual]] [[IsNotZeroOrSubnormal]]
149+
; CHECK-VISA-DAG: ([[IsNotProperValue]]) sel {{.*}} [[Res:.*]](0,0)<1> 0x3f800000:f [[ResMul]]
150+
; CHECK-VISA-DAG: mov (M1_NM, 1) [[StoreRes:.*]](0,0)<1> [[Res]](0,0)<0;1,0>
151+
; CHECK-VISA-DAG: .decl [[StoreRes]] {{.*}} type=bf {{.*}}
138152
%res2 = fdiv <2 x bfloat> %b1_2, %b2_2
139153
store <2 x bfloat> %res2, <2 x bfloat> addrspace(1)* %out2, align 4
140154
%res4 = fdiv <4 x bfloat> %b1_4, %b2_4

0 commit comments

Comments
 (0)