Skip to content

Commit a62f72f

Browse files
sophie-zhaoabner-chenc
authored andcommitted
cmd/compile/internal/ssa: optimise more branches with SGTconst/SGTUconst on loong64
Add branches to convert EQZ/NEZ into more optimal branch conditions. This reduces 720 instructions from the go toolchain binary on loong64. file before after Δ % asm 555306 555082 -224 -0.0403% cgo 481814 481742 -72 -0.0149% compile 2475686 2475710 +24 +0.0010% cover 516854 516770 -84 -0.0163% link 702566 702530 -36 -0.0051% preprofile 238612 238548 -64 -0.0268% vet 793140 793060 -80 -0.0101% go 1573466 1573346 -120 -0.0076% gofmt 320560 320496 -64 -0.0200% total 7658004 7657284 -720 -0.0094% Additionally, rename EQ/NE to EQZ/NEZ to enhance readability. Change-Id: Ibc876bc8b8d4e81d5c3aaf0b74b60419f3c771b1 Reviewed-on: https://go-review.googlesource.com/c/go/+/693455 Reviewed-by: abner chenc <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent fbac94a commit a62f72f

File tree

5 files changed

+157
-108
lines changed

5 files changed

+157
-108
lines changed

src/cmd/compile/internal/loong64/ssa.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,8 +1075,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
10751075
var blockJump = map[ssa.BlockKind]struct {
10761076
asm, invasm obj.As
10771077
}{
1078-
ssa.BlockLOONG64EQ: {loong64.ABEQ, loong64.ABNE},
1079-
ssa.BlockLOONG64NE: {loong64.ABNE, loong64.ABEQ},
1078+
ssa.BlockLOONG64EQZ: {loong64.ABEQ, loong64.ABNE},
1079+
ssa.BlockLOONG64NEZ: {loong64.ABNE, loong64.ABEQ},
10801080
ssa.BlockLOONG64LTZ: {loong64.ABLTZ, loong64.ABGEZ},
10811081
ssa.BlockLOONG64GEZ: {loong64.ABGEZ, loong64.ABLTZ},
10821082
ssa.BlockLOONG64LEZ: {loong64.ABLEZ, loong64.ABGTZ},
@@ -1102,7 +1102,7 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
11021102
case ssa.BlockExit, ssa.BlockRetJmp:
11031103
case ssa.BlockRet:
11041104
s.Prog(obj.ARET)
1105-
case ssa.BlockLOONG64EQ, ssa.BlockLOONG64NE,
1105+
case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
11061106
ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
11071107
ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
11081108
ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
@@ -1132,7 +1132,7 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
11321132
p.From.Type = obj.TYPE_REG
11331133
p.From.Reg = b.Controls[0].Reg()
11341134
p.Reg = b.Controls[1].Reg()
1135-
case ssa.BlockLOONG64EQ, ssa.BlockLOONG64NE,
1135+
case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
11361136
ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
11371137
ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
11381138
ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:

src/cmd/compile/internal/ssa/_gen/LOONG64.rules

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@
517517
(GetCallerSP ...) => (LoweredGetCallerSP ...)
518518
(GetCallerPC ...) => (LoweredGetCallerPC ...)
519519

520-
(If cond yes no) => (NE (MOVBUreg <typ.UInt64> cond) yes no)
520+
(If cond yes no) => (NEZ (MOVBUreg <typ.UInt64> cond) yes no)
521521
(MOVBUreg x:((SGT|SGTU) _ _)) => x
522522
(MOVBUreg x:(XOR (MOVVconst [1]) ((SGT|SGTU) _ _))) => x
523523

@@ -902,41 +902,46 @@
902902
// Optimizations
903903

904904
// Absorb boolean tests into block
905-
(NE (FPFlagTrue cmp) yes no) => (FPT cmp yes no)
906-
(NE (FPFlagFalse cmp) yes no) => (FPF cmp yes no)
907-
(EQ (FPFlagTrue cmp) yes no) => (FPF cmp yes no)
908-
(EQ (FPFlagFalse cmp) yes no) => (FPT cmp yes no)
909-
(NE (XORconst [1] cmp:(SGT _ _)) yes no) => (EQ cmp yes no)
910-
(NE (XORconst [1] cmp:(SGTU _ _)) yes no) => (EQ cmp yes no)
911-
(NE (XORconst [1] cmp:(SGTconst _)) yes no) => (EQ cmp yes no)
912-
(NE (XORconst [1] cmp:(SGTUconst _)) yes no) => (EQ cmp yes no)
913-
(EQ (XORconst [1] cmp:(SGT _ _)) yes no) => (NE cmp yes no)
914-
(EQ (XORconst [1] cmp:(SGTU _ _)) yes no) => (NE cmp yes no)
915-
(EQ (XORconst [1] cmp:(SGTconst _)) yes no) => (NE cmp yes no)
916-
(EQ (XORconst [1] cmp:(SGTUconst _)) yes no) => (NE cmp yes no)
917-
(NE (SGTUconst [1] x) yes no) => (EQ x yes no)
918-
(EQ (SGTUconst [1] x) yes no) => (NE x yes no)
919-
(NE (SGTU x (MOVVconst [0])) yes no) => (NE x yes no)
920-
(EQ (SGTU x (MOVVconst [0])) yes no) => (EQ x yes no)
921-
(NE (SGTconst [0] x) yes no) => (LTZ x yes no)
922-
(EQ (SGTconst [0] x) yes no) => (GEZ x yes no)
923-
(NE (SGT x (MOVVconst [0])) yes no) => (GTZ x yes no)
924-
(EQ (SGT x (MOVVconst [0])) yes no) => (LEZ x yes no)
925-
926-
(EQ (SGTU (MOVVconst [c]) y) yes no) && c >= -2048 && c <= 2047 => (EQ (SGTUconst [c] y) yes no)
927-
(NE (SGTU (MOVVconst [c]) y) yes no) && c >= -2048 && c <= 2047 => (NE (SGTUconst [c] y) yes no)
928-
(EQ (SUBV x y) yes no) => (BEQ x y yes no)
929-
(NE (SUBV x y) yes no) => (BNE x y yes no)
930-
(EQ (SGT x y) yes no) => (BGE y x yes no)
931-
(NE (SGT x y) yes no) => (BLT y x yes no)
932-
(EQ (SGTU x y) yes no) => (BGEU y x yes no)
933-
(NE (SGTU x y) yes no) => (BLTU y x yes no)
905+
(NEZ (FPFlagTrue cmp) yes no) => (FPT cmp yes no)
906+
(NEZ (FPFlagFalse cmp) yes no) => (FPF cmp yes no)
907+
(EQZ (FPFlagTrue cmp) yes no) => (FPF cmp yes no)
908+
(EQZ (FPFlagFalse cmp) yes no) => (FPT cmp yes no)
909+
(NEZ (XORconst [1] cmp:(SGT _ _)) yes no) => (EQZ cmp yes no)
910+
(NEZ (XORconst [1] cmp:(SGTU _ _)) yes no) => (EQZ cmp yes no)
911+
(NEZ (XORconst [1] cmp:(SGTconst _)) yes no) => (EQZ cmp yes no)
912+
(NEZ (XORconst [1] cmp:(SGTUconst _)) yes no) => (EQZ cmp yes no)
913+
(EQZ (XORconst [1] cmp:(SGT _ _)) yes no) => (NEZ cmp yes no)
914+
(EQZ (XORconst [1] cmp:(SGTU _ _)) yes no) => (NEZ cmp yes no)
915+
(EQZ (XORconst [1] cmp:(SGTconst _)) yes no) => (NEZ cmp yes no)
916+
(EQZ (XORconst [1] cmp:(SGTUconst _)) yes no) => (NEZ cmp yes no)
917+
(NEZ (SGTUconst [1] x) yes no) => (EQZ x yes no)
918+
(EQZ (SGTUconst [1] x) yes no) => (NEZ x yes no)
919+
(NEZ (SGTU x (MOVVconst [0])) yes no) => (NEZ x yes no)
920+
(EQZ (SGTU x (MOVVconst [0])) yes no) => (EQZ x yes no)
921+
(NEZ (SGTconst [0] x) yes no) => (LTZ x yes no)
922+
(EQZ (SGTconst [0] x) yes no) => (GEZ x yes no)
923+
(NEZ (SGT x (MOVVconst [0])) yes no) => (GTZ x yes no)
924+
(EQZ (SGT x (MOVVconst [0])) yes no) => (LEZ x yes no)
925+
926+
// Convert EQZ/NEZ into more optimal branch conditions.
927+
(EQZ (SGTU (MOVVconst [c]) y) yes no) && c >= -2048 && c <= 2047 => (EQZ (SGTUconst [c] y) yes no)
928+
(NEZ (SGTU (MOVVconst [c]) y) yes no) && c >= -2048 && c <= 2047 => (NEZ (SGTUconst [c] y) yes no)
929+
(EQZ (SUBV x y) yes no) => (BEQ x y yes no)
930+
(NEZ (SUBV x y) yes no) => (BNE x y yes no)
931+
(EQZ (SGT x y) yes no) => (BGE y x yes no)
932+
(NEZ (SGT x y) yes no) => (BLT y x yes no)
933+
(EQZ (SGTU x y) yes no) => (BGEU y x yes no)
934+
(NEZ (SGTU x y) yes no) => (BLTU y x yes no)
935+
(EQZ (SGTconst [c] y) yes no) => (BGE y (MOVVconst [c]) yes no)
936+
(NEZ (SGTconst [c] y) yes no) => (BLT y (MOVVconst [c]) yes no)
937+
(EQZ (SGTUconst [c] y) yes no) => (BGEU y (MOVVconst [c]) yes no)
938+
(NEZ (SGTUconst [c] y) yes no) => (BLTU y (MOVVconst [c]) yes no)
934939

935940
// absorb constants into branches
936-
(EQ (MOVVconst [0]) yes no) => (First yes no)
937-
(EQ (MOVVconst [c]) yes no) && c != 0 => (First no yes)
938-
(NE (MOVVconst [0]) yes no) => (First no yes)
939-
(NE (MOVVconst [c]) yes no) && c != 0 => (First yes no)
941+
(EQZ (MOVVconst [0]) yes no) => (First yes no)
942+
(EQZ (MOVVconst [c]) yes no) && c != 0 => (First no yes)
943+
(NEZ (MOVVconst [0]) yes no) => (First no yes)
944+
(NEZ (MOVVconst [c]) yes no) && c != 0 => (First yes no)
940945
(LTZ (MOVVconst [c]) yes no) && c < 0 => (First yes no)
941946
(LTZ (MOVVconst [c]) yes no) && c >= 0 => (First no yes)
942947
(LEZ (MOVVconst [c]) yes no) && c <= 0 => (First yes no)

src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,8 +580,8 @@ func init() {
580580
}
581581

582582
blocks := []blockData{
583-
{name: "EQ", controls: 1},
584-
{name: "NE", controls: 1},
583+
{name: "EQZ", controls: 1}, // = 0
584+
{name: "NEZ", controls: 1}, // != 0
585585
{name: "LTZ", controls: 1}, // < 0
586586
{name: "LEZ", controls: 1}, // <= 0
587587
{name: "GTZ", controls: 1}, // > 0

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)