PullRequestInc
diff --git a/‎src/cmd/compile/internal/ssa/_gen/LOONG64.rules‎
Lines changed: 2 additions & 2 deletions b/‎src/cmd/compile/internal/ssa/_gen/LOONG64.rules‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules‎
Lines changed: 6 additions & 0 deletions b/‎src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/cmd/compile/internal/ssa/config.go‎
Lines changed: 83 additions & 3 deletions b/‎src/cmd/compile/internal/ssa/config.go‎
Lines changed: 83 additions & 3 deletions
diff --git a/‎src/cmd/compile/internal/ssa/rewriteLOONG64.go‎
Lines changed: 6 additions & 20 deletions b/‎src/cmd/compile/internal/ssa/rewriteLOONG64.go‎
Lines changed: 6 additions & 20 deletions
diff --git a/‎src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go‎
Lines changed: 29 additions & 0 deletions b/‎src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎test/codegen/arithmetic.go‎
Lines changed: 4 additions & 0 deletions b/‎test/codegen/arithmetic.go‎
Lines changed: 4 additions & 0 deletions
@@ -750,10 +750,10 @@
 (SRLVconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVVconst [0])
 
 // mul by constant
-(MULV x (MOVVconst [-1])) => (NEGV x)
 (MULV _ (MOVVconst [0])) => (MOVVconst [0])
 (MULV x (MOVVconst [1])) => x
-(MULV x (MOVVconst [c])) && isPowerOfTwo(c) => (SLLVconst [log64(c)] x)
+
+(MULV  x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}
 
 // div by constant
 (DIVVU x (MOVVconst [1])) => x
 
@@ -0,0 +1,6 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Prefer addition when shifting left by one.
+(SLLVconst [1] x) => (ADDV x x)
@@ -283,6 +283,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
 		c.RegSize = 8
 		c.lowerBlock = rewriteBlockLOONG64
 		c.lowerValue = rewriteValueLOONG64
+		c.lateLowerBlock = rewriteBlockLOONG64latelower
+		c.lateLowerValue = rewriteValueLOONG64latelower
 		c.registers = registersLOONG64[:]
 		c.gpRegMask = gpRegMaskLOONG64
 		c.fpRegMask = fpRegMaskLOONG64
@@ -562,6 +564,43 @@ func (c *Config) buildRecipes(arch string) {
 					return m.Block.NewValue2I(m.Pos, OpARM64SUBshiftLL, m.Type, int64(i), x, y)
 				})
 		}
+	case "loong64":
+		// - multiply is 4 cycles.
+		// - add/sub/shift are 1 cycle.
+		// On loong64, using a multiply also needs to load the constant into a register.
+		// TODO: figure out a happy medium.
+		mulCost = 45
+
+		// add
+		r(1, 1, 10,
+			func(m, x, y *Value) *Value {
+				return m.Block.NewValue2(m.Pos, OpLOONG64ADDV, m.Type, x, y)
+			})
+		// neg
+		r(-1, 0, 10,
+			func(m, x, y *Value) *Value {
+				return m.Block.NewValue1(m.Pos, OpLOONG64NEGV, m.Type, x)
+			})
+		// sub
+		r(1, -1, 10,
+			func(m, x, y *Value) *Value {
+				return m.Block.NewValue2(m.Pos, OpLOONG64SUBV, m.Type, x, y)
+			})
+
+		// regular shifts
+		for i := 1; i < 64; i++ {
+			c := 10
+			if i == 1 {
+				// Prefer x<<1 over x+x.
+				// Note that we eventually reverse this decision in LOONG64latelower.rules,
+				// but this makes shift combining rules in LOONG64.rules simpler.
+				c--
+			}
+			r(1<<i, 0, c,
+				func(m, x, y *Value) *Value {
+					return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x)
+				})
+		}
 	}
 
 	c.mulRecipes = map[int64]mulRecipe{}
@@ -628,17 +667,58 @@ func (c *Config) buildRecipes(arch string) {
 		}
 	}
 
+	// Currently we only process 3 linear combination instructions for loong64.
+	if arch == "loong64" {
+		// Three-instruction recipes.
+		// D: The first and the second are all single-instruction recipes, and they are also the third's inputs.
+		// E: The first single-instruction is the second's input, and the second is the third's input.
+
+		// D
+		for _, first := range linearCombos {
+			for _, second := range linearCombos {
+				for _, third := range linearCombos {
+					x := third.a*(first.a+first.b) + third.b*(second.a+second.b)
+					cost := first.cost + second.cost + third.cost
+					old := c.mulRecipes[x]
+					if (old.build == nil || cost < old.cost) && cost < mulCost {
+						c.mulRecipes[x] = mulRecipe{cost: cost, build: func(m, v *Value) *Value {
+							v1 := first.build(m, v, v)
+							v2 := second.build(m, v, v)
+							return third.build(m, v1, v2)
+						}}
+					}
+				}
+			}
+		}
+
+		// E
+		for _, first := range linearCombos {
+			for _, second := range linearCombos {
+				for _, third := range linearCombos {
+					x := third.a*(second.a*(first.a+first.b)+second.b) + third.b
+					cost := first.cost + second.cost + third.cost
+					old := c.mulRecipes[x]
+					if (old.build == nil || cost < old.cost) && cost < mulCost {
+						c.mulRecipes[x] = mulRecipe{cost: cost, build: func(m, v *Value) *Value {
+							v1 := first.build(m, v, v)
+							v2 := second.build(m, v1, v)
+							return third.build(m, v2, v)
+						}}
+					}
+				}
+			}
+		}
+	}
+
 	// These cases should be handled specially by rewrite rules.
 	// (Otherwise v * 1 == (neg (neg v)))
 	delete(c.mulRecipes, 0)
 	delete(c.mulRecipes, 1)
 
-	// Currently we assume that it doesn't help to do 3 linear
-	// combination instructions.
-
 	// Currently:
 	// len(c.mulRecipes) == 5984 on arm64
 	//                       680 on amd64
+	//                      5984 on loong64
 	// This function takes ~2.5ms on arm64.
 	//println(len(c.mulRecipes))
 }
@@ -228,13 +228,15 @@ func Pow2Muls(n1, n2 int) (int, int) {
 	// 386:"SHLL\t[$]5",-"IMULL"
 	// arm:"SLL\t[$]5",-"MUL"
 	// arm64:"LSL\t[$]5",-"MUL"
+	// loong64:"SLLV\t[$]5",-"MULV"
 	// ppc64x:"SLD\t[$]5",-"MUL"
 	a := n1 * 32
 
 	// amd64:"SHLQ\t[$]6",-"IMULQ"
 	// 386:"SHLL\t[$]6",-"IMULL"
 	// arm:"SLL\t[$]6",-"MUL"
 	// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
+	// loong64:"SLLV\t[$]6",-"MULV"
 	// ppc64x:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
 	b := -64 * n2
 
@@ -255,11 +257,13 @@ func Mul_96(n int) int {
 	// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
 	// arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
 	// arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+	// loong64:"ADDVU","SLLV\t[$]5",-"MULV"
 	// s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
 	return n * 96
 }
 
 func Mul_n120(n int) int {
+	// loong64:"SLLV\t[$]3","SLLV\t[$]7","SUBVU",-"MULV"
 	// s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
 	return n * -120
 }