Skip to content

Commit 3c4fece

Browse files
authored
[Hexagon] Optimize sfclass/dfclass compares (llvm#165735)
fclass intrinsics generate a sub-optimal code by doing a predicate transfer and compare. This patch optimizes out and directly uses the predicate.
1 parent 527e0ec commit 3c4fece

File tree

2 files changed

+99
-0
lines changed

2 files changed

+99
-0
lines changed

llvm/lib/Target/Hexagon/HexagonPatterns.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3434,6 +3434,19 @@ let AddedComplexity = 100 in {
34343434
(C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>;
34353435
}
34363436

3437+
multiclass FloatClass<SDPatternOperator IntOp, InstHexagon MI,
3438+
PatFrag RegPred> {
3439+
let AddedComplexity = 100 in {
3440+
def: Pat<(i1 (seteq (IntOp RegPred:$Rs, u5_0ImmPred_timm:$u5), 0)),
3441+
(C2_not (MI RegPred:$Rs, u5_0ImmPred_timm:$u5))>;
3442+
def: Pat<(i1 (setne (IntOp RegPred:$Rs, u5_0ImmPred_timm:$u5), 0)),
3443+
(MI RegPred:$Rs, u5_0ImmPred_timm:$u5)>;
3444+
}
3445+
}
3446+
3447+
defm : FloatClass<int_hexagon_F2_sfclass, F2_sfclass, F32>;
3448+
defm : FloatClass<int_hexagon_F2_dfclass, F2_dfclass, F64>;
3449+
34373450
def: Pat<(int_hexagon_instrprof_custom (HexagonAtPcrel tglobaladdr:$addr), u32_0ImmPred:$I),
34383451
(PS_call_instrprof_custom tglobaladdr:$addr, imm:$I)>;
34393452

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; Tests lowering of sfclass/dfclass compares.
2+
; Sub-optimal code
3+
; {
4+
; p0 = sfclass(r0,#16)
5+
; r0 = sfadd(r0,r0)
6+
; }
7+
; {
8+
; r2 = p0
9+
; }
10+
; {
11+
; if (p0.new) r0 = ##1065353216
12+
; p0 = cmp.eq(r2,#0)
13+
; jumpr r31
14+
; }
15+
; With the patterns added, we should be generating
16+
; {
17+
; p0 = sfclass(r0,#16)
18+
; r0 = sfadd(r0,r0)
19+
; }
20+
; {
21+
; if (!p0) r0 = ##1065353216
22+
; jumpr r31
23+
; }
24+
25+
; RUN: llc -march=hexagon -stop-after=hexagon-isel %s -o - | FileCheck %s
26+
27+
; CHECK: bb.0.entry1
28+
; CHECK: F2_sfclass
29+
; CHECK-NOT: C2_cmp
30+
; CHECK: C2_not
31+
; CHECK: F2_sfadd
32+
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
33+
define float @test1(float noundef %x) {
34+
entry1:
35+
%0 = tail call i32 @llvm.hexagon.F2.sfclass(float %x, i32 16)
36+
%tobool.not = icmp eq i32 %0, 0
37+
%add = fadd float %x, %x
38+
%spec.select = select i1 %tobool.not, float 1.000000e+00, float %add
39+
ret float %spec.select
40+
}
41+
42+
; CHECK: bb.0.entry2
43+
; CHECK: F2_sfclass
44+
; CHECK-NOT: C2_cmp
45+
; CHECK: F2_sfadd
46+
define float @test2(float noundef %x) {
47+
entry2:
48+
%0 = tail call i32 @llvm.hexagon.F2.sfclass(float %x, i32 16)
49+
%tobool.not = icmp eq i32 %0, 0
50+
%add = fadd float %x, %x
51+
%spec.select = select i1 %tobool.not, float %add, float 1.000000e+00
52+
ret float %spec.select
53+
}
54+
55+
; CHECK: bb.0.entry3
56+
; CHECK: F2_dfclass
57+
; CHECK-NOT: C2_cmp
58+
; CHECK: C2_not
59+
; CHECK: F2_dfadd
60+
define double @test3(double noundef %x) {
61+
entry3:
62+
%0 = tail call i32 @llvm.hexagon.F2.dfclass(double %x, i32 16)
63+
%tobool.not = icmp eq i32 %0, 0
64+
%add = fadd double %x, %x
65+
%spec.select = select i1 %tobool.not, double 1.000000e+00, double %add
66+
ret double %spec.select
67+
}
68+
69+
; CHECK: bb.0.entry4
70+
; CHECK: F2_dfclass
71+
; CHECK-NOT: C2_cmp
72+
; CHECK: F2_dfadd
73+
define double @test4(double noundef %x) {
74+
entry4:
75+
%0 = tail call i32 @llvm.hexagon.F2.dfclass(double %x, i32 16)
76+
%tobool.not = icmp eq i32 %0, 0
77+
%add = fadd double %x, %x
78+
%spec.select = select i1 %tobool.not, double %add, double 1.000000e+00
79+
ret double %spec.select
80+
}
81+
82+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
83+
declare i32 @llvm.hexagon.F2.dfclass(double, i32 immarg)
84+
85+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
86+
declare i32 @llvm.hexagon.F2.sfclass(float, i32 immarg)

0 commit comments

Comments
 (0)