cmd/compile: accounts rematerialize ops's output reginfo

JunyangShao · cherrymui · commit 10e796884905 · 2025-10-03T12:30:11.000-07:00
This CL implements the check for rematerializeable value's output regspec at its remateralization site. It has some potential problems, please see the TODO in regalloc.go. Fixes golang#70451. Cherry-picked from the dev.simd branch. This CL is not necessarily SIMD specific. Apply early to reduce risk. Change-Id: Ib624b967031776851136554719e939e9bf116b7c Reviewed-on: https://go-review.googlesource.com/c/go/+/695315 Reviewed-by: David Chase <drchase@google.com> TryBot-Bypass: David Chase <drchase@google.com> Reviewed-on: https://go-review.googlesource.com/c/go/+/708857 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go
@@ -102,6 +102,7 @@ func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func {
 		NamedValues:          make(map[LocalSlot][]*Value),
 		CanonicalLocalSlots:  make(map[LocalSlot]*LocalSlot),
 		CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot),
+		OwnAux:               &AuxCall{},
 	}
 }
 
diff --git a/src/cmd/compile/internal/ssa/func_test.go b/src/cmd/compile/internal/ssa/func_test.go
@@ -250,6 +250,11 @@ func Exit(arg string) ctrl {
 	return ctrl{BlockExit, arg, []string{}}
 }
 
+// Ret specifies a BlockRet.
+func Ret(arg string) ctrl {
+	return ctrl{BlockRet, arg, []string{}}
+}
+
 // Eq specifies a BlockAMD64EQ.
 func Eq(cond, sub, alt string) ctrl {
 	return ctrl{BlockAMD64EQ, cond, []string{sub, alt}}
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
@@ -609,6 +609,29 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
 	} else if v.rematerializeable() {
 		// Rematerialize instead of loading from the spill location.
 		c = v.copyIntoWithXPos(s.curBlock, pos)
+		// We need to consider its output mask and potentially issue a Copy
+		// if there are register mask conflicts.
+		// This currently happens for the SIMD package only between GP and FP
+		// register. Because Intel's vector extension can put integer value into
+		// FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
+		// Because GP and FP masks do not overlap, mask & outputMask == 0
+		// detects this situation thoroughly.
+		sourceMask := s.regspec(c).outputs[0].regs
+		if mask&sourceMask == 0 && !onWasmStack {
+			s.setOrig(c, v)
+			s.assignReg(s.allocReg(sourceMask, v), v, c)
+			// v.Type for the new OpCopy is likely wrong and it might delay the problem
+			// until ssa to asm lowering, which might need the types to generate the right
+			// assembly for OpCopy. For Intel's GP to FP move, it happens to be that
+			// MOV instruction has such a variant so it happens to be right.
+			// But it's unclear for other architectures or situations, and the problem
+			// might be exposed when the assembler sees illegal instructions.
+			// Right now make we still pick v.Type, because at least its size should be correct
+			// for the rematerialization case the amd64 SIMD package exposed.
+			// TODO: We might need to figure out a way to find the correct type or make
+			// the asm lowering use reg info only for OpCopy.
+			c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
+		}
 	} else {
 		// Load v from its spill location.
 		spill := s.makeSpill(v, s.curBlock)
diff --git a/src/cmd/compile/internal/ssa/regalloc_test.go b/src/cmd/compile/internal/ssa/regalloc_test.go
@@ -6,6 +6,7 @@ package ssa
 
 import (
 	"cmd/compile/internal/types"
+	"cmd/internal/obj/x86"
 	"fmt"
 	"testing"
 )
@@ -279,3 +280,27 @@ func numOps(b *Block, op Op) int {
 	}
 	return n
 }
+
+func TestRematerializeableRegCompatible(t *testing.T) {
+	c := testConfig(t)
+	f := c.Fun("entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+			Valu("x", OpAMD64MOVLconst, c.config.Types.Int32, 1, nil),
+			Valu("a", OpAMD64POR, c.config.Types.Float32, 0, nil, "x", "x"),
+			Valu("res", OpMakeResult, types.NewResults([]*types.Type{c.config.Types.Float32, types.TypeMem}), 0, nil, "a", "mem"),
+			Ret("res"),
+		),
+	)
+	regalloc(f.f)
+	checkFunc(f.f)
+	moveFound := false
+	for _, v := range f.f.Blocks[0].Values {
+		if v.Op == OpCopy && x86.REG_X0 <= v.Reg() && v.Reg() <= x86.REG_X31 {
+			moveFound = true
+		}
+	}
+	if !moveFound {
+		t.Errorf("Expects an Copy to be issued, but got: %+v", f.f)
+	}
+}

Original file line number	Diff line number	Diff line change
`@@ -102,6 +102,7 @@ func (c Config) NewFunc(fe Frontend, cache Cache) *Func {`
`102`	`102`	`NamedValues: make(map[LocalSlot][]*Value),`
`103`	`103`	`CanonicalLocalSlots: make(map[LocalSlot]*LocalSlot),`
`104`	`104`	`CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot),`
	`105`	`+ OwnAux: &AuxCall{},`
`105`	`106`	`}`
`106`	`107`	`}`
`107`	`108`