Skip to content

Commit 90b7d7a

Browse files
limeidanabner-chenc
authored andcommitted
cmd/compile/internal: optimize multiplication use new operation 'ADDshiftLLV' on loong64
goos: linux goarch: loong64 pkg: cmd/compile/internal/test cpu: Loongson-3A6000-HV @ 2500.00MHz │ old │ new │ │ sec/op │ sec/op vs base │ MulconstI32/3 0.8004n ± 0% 0.4247n ± 2% -46.94% (p=0.000 n=10) MulconstI32/5 0.8005n ± 0% 0.4256n ± 1% -46.83% (p=0.000 n=10) MulconstI32/12 1.2010n ± 0% 0.8005n ± 0% -33.35% (p=0.000 n=10) MulconstI32/120 0.8090n ± 0% 0.8067n ± 0% -0.28% (p=0.007 n=10) MulconstI32/-120 0.8109n ± 0% 0.8072n ± 0% -0.47% (p=0.000 n=10) MulconstI32/65537 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) MulconstI32/65538 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.265 n=10) MulconstI64/3 0.8005n ± 0% 0.4241n ± 1% -47.02% (p=0.000 n=10) MulconstI64/5 0.8004n ± 0% 0.4249n ± 1% -46.91% (p=0.000 n=10) MulconstI64/12 1.2010n ± 0% 0.8004n ± 0% -33.36% (p=0.000 n=10) MulconstI64/120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.635 n=10) MulconstI64/-120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.837 n=10) MulconstI64/65537 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.837 n=10) MulconstI64/65538 0.8096n ± 0% 0.8004n ± 0% -1.14% (p=0.000 n=10) MulconstU32/3 0.8004n ± 0% 0.4263n ± 1% -46.75% (p=0.000 n=10) MulconstU32/5 0.8005n ± 0% 0.4262n ± 1% -46.76% (p=0.000 n=10) MulconstU32/12 1.2010n ± 0% 0.8005n ± 0% -33.35% (p=0.000 n=10) MulconstU32/120 0.8105n ± 0% 0.8096n ± 0% ~ (p=0.183 n=10) MulconstU32/65537 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) MulconstU32/65538 0.8005n ± 0% 0.8005n ± 0% ~ (p=1.000 n=10) MulconstU64/3 0.8004n ± 0% 0.4265n ± 4% -46.71% (p=0.000 n=10) MulconstU64/5 0.8004n ± 0% 0.4256n ± 0% -46.82% (p=0.000 n=10) MulconstU64/12 1.2010n ± 0% 0.8004n ± 0% -33.36% (p=0.000 n=10) MulconstU64/120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.387 n=10) MulconstU64/65537 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.265 n=10) MulconstU64/65538 0.8080n ± 0% 0.8004n ± 0% -0.93% (p=0.000 n=10) geomean 0.8539n 0.6597n -22.74% Change-Id: Ie33e88985d7639f481bbba540bc917b9f185c357 Reviewed-on: https://go-review.googlesource.com/c/go/+/693855 Reviewed-by: Dmitri Shuralyov <[email protected]> Reviewed-by: Keith Randall <[email protected]> Reviewed-by: sophie zhao <[email protected]> Reviewed-by: abner chenc <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 1b263fc commit 90b7d7a

File tree

7 files changed

+91
-53
lines changed

7 files changed

+91
-53
lines changed

src/cmd/compile/internal/loong64/ssa.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
10651065
{Type: obj.TYPE_CONST, Offset: int64((v.AuxInt >> 0) & 0x1f)},
10661066
})
10671067

1068+
case ssa.OpLOONG64ADDshiftLLV:
1069+
// ADDshiftLLV Rarg0, Rarg1, $shift
1070+
// ALSLV $shift, Rarg1, Rarg0, Rtmp
1071+
p := s.Prog(v.Op.Asm())
1072+
p.From.Type = obj.TYPE_CONST
1073+
p.From.Offset = v.AuxInt
1074+
p.Reg = v.Args[1].Reg()
1075+
p.AddRestSourceReg(v.Args[0].Reg())
1076+
p.To.Type = obj.TYPE_REG
1077+
p.To.Reg = v.Reg()
1078+
10681079
case ssa.OpClobber, ssa.OpClobberReg:
10691080
// TODO: implement for clobberdead experiment. Nop is ok for now.
10701081
default:

src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,8 @@ func init() {
577577
// is $hint and bit[41:5] is $n.
578578
{name: "PRELD", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELD", hasSideEffects: true},
579579
{name: "PRELDX", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELDX", hasSideEffects: true},
580+
581+
{name: "ADDshiftLLV", argLength: 2, aux: "Int64", reg: gp21, asm: "ALSLV"}, // arg0 + arg1<<auxInt, the value of auxInt should be in the range [1, 4].
580582
}
581583

582584
blocks := []blockData{

src/cmd/compile/internal/ssa/config.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ func (c *Config) buildRecipes(arch string) {
566566
}
567567
case "loong64":
568568
// - multiply is 4 cycles.
569-
// - add/sub/shift are 1 cycle.
569+
// - add/sub/shift/alsl are 1 cycle.
570570
// On loong64, using a multiply also needs to load the constant into a register.
571571
// TODO: figure out a happy medium.
572572
mulCost = 45
@@ -601,6 +601,15 @@ func (c *Config) buildRecipes(arch string) {
601601
return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x)
602602
})
603603
}
604+
605+
// ADDshiftLLV
606+
for i := 1; i < 5; i++ {
607+
c := 10
608+
r(1, 1<<i, c,
609+
func(m, x, y *Value) *Value {
610+
return m.Block.NewValue2I(m.Pos, OpLOONG64ADDshiftLLV, m.Type, int64(i), x, y)
611+
})
612+
}
604613
}
605614

606615
c.mulRecipes = map[int64]mulRecipe{}
@@ -718,7 +727,7 @@ func (c *Config) buildRecipes(arch string) {
718727
// Currently:
719728
// len(c.mulRecipes) == 5984 on arm64
720729
// 680 on amd64
721-
// 5984 on loong64
730+
// 9738 on loong64
722731
// This function takes ~2.5ms on arm64.
723732
//println(len(c.mulRecipes))
724733
}

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/loong64/asm.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2743,8 +2743,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
27432743

27442744
case 64: // alsl rd, rj, rk, sa2
27452745
sa := p.From.Offset - 1
2746-
if sa > 3 {
2747-
c.ctxt.Diag("The shift amount is too large.")
2746+
if sa < 0 || sa > 3 {
2747+
c.ctxt.Diag("%v: shift amount out of range[1, 4].\n", p)
27482748
}
27492749
r := p.GetFrom3().Reg
27502750
o1 = OP_2IRRR(c.opirrr(p.As), uint32(sa), uint32(r), uint32(p.Reg), uint32(p.To.Reg))

test/codegen/arithmetic.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ func Mul_96(n int) int {
257257
// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
258258
// arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
259259
// arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
260-
// loong64:"ADDVU","SLLV\t[$]5",-"MULV"
260+
// loong64:"SLLV\t[$]5","ALSLV\t[$]1,"
261261
// s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
262262
return n * 96
263263
}
@@ -317,7 +317,7 @@ func MergeMuls5(a, n int) int {
317317
// Multiplications folded negation
318318

319319
func FoldNegMul(a int) int {
320-
// loong64:"MULV","MOVV\t[$]-11",-"SUBVU\tR[0-9], R0,"
320+
// loong64:"SUBVU","ALSLV\t[$]2","ALSLV\t[$]1"
321321
return (-a) * 11
322322
}
323323

0 commit comments

Comments
 (0)