Skip to content

Commit 10e7968

Browse files
JunyangShaocherrymui
authored andcommitted
cmd/compile: accounts rematerialize ops's output reginfo
This CL implements the check for rematerializeable value's output regspec at its remateralization site. It has some potential problems, please see the TODO in regalloc.go. Fixes golang#70451. Cherry-picked from the dev.simd branch. This CL is not necessarily SIMD specific. Apply early to reduce risk. Change-Id: Ib624b967031776851136554719e939e9bf116b7c Reviewed-on: https://go-review.googlesource.com/c/go/+/695315 Reviewed-by: David Chase <[email protected]> TryBot-Bypass: David Chase <[email protected]> Reviewed-on: https://go-review.googlesource.com/c/go/+/708857 Reviewed-by: Junyang Shao <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent ab04395 commit 10e7968

File tree

4 files changed

+54
-0
lines changed

4 files changed

+54
-0
lines changed

src/cmd/compile/internal/ssa/func.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func {
102102
NamedValues: make(map[LocalSlot][]*Value),
103103
CanonicalLocalSlots: make(map[LocalSlot]*LocalSlot),
104104
CanonicalLocalSplits: make(map[LocalSlotSplitKey]*LocalSlot),
105+
OwnAux: &AuxCall{},
105106
}
106107
}
107108

src/cmd/compile/internal/ssa/func_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,11 @@ func Exit(arg string) ctrl {
250250
return ctrl{BlockExit, arg, []string{}}
251251
}
252252

253+
// Ret specifies a BlockRet.
254+
func Ret(arg string) ctrl {
255+
return ctrl{BlockRet, arg, []string{}}
256+
}
257+
253258
// Eq specifies a BlockAMD64EQ.
254259
func Eq(cond, sub, alt string) ctrl {
255260
return ctrl{BlockAMD64EQ, cond, []string{sub, alt}}

src/cmd/compile/internal/ssa/regalloc.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,29 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
609609
} else if v.rematerializeable() {
610610
// Rematerialize instead of loading from the spill location.
611611
c = v.copyIntoWithXPos(s.curBlock, pos)
612+
// We need to consider its output mask and potentially issue a Copy
613+
// if there are register mask conflicts.
614+
// This currently happens for the SIMD package only between GP and FP
615+
// register. Because Intel's vector extension can put integer value into
616+
// FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
617+
// Because GP and FP masks do not overlap, mask & outputMask == 0
618+
// detects this situation thoroughly.
619+
sourceMask := s.regspec(c).outputs[0].regs
620+
if mask&sourceMask == 0 && !onWasmStack {
621+
s.setOrig(c, v)
622+
s.assignReg(s.allocReg(sourceMask, v), v, c)
623+
// v.Type for the new OpCopy is likely wrong and it might delay the problem
624+
// until ssa to asm lowering, which might need the types to generate the right
625+
// assembly for OpCopy. For Intel's GP to FP move, it happens to be that
626+
// MOV instruction has such a variant so it happens to be right.
627+
// But it's unclear for other architectures or situations, and the problem
628+
// might be exposed when the assembler sees illegal instructions.
629+
// Right now make we still pick v.Type, because at least its size should be correct
630+
// for the rematerialization case the amd64 SIMD package exposed.
631+
// TODO: We might need to figure out a way to find the correct type or make
632+
// the asm lowering use reg info only for OpCopy.
633+
c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
634+
}
612635
} else {
613636
// Load v from its spill location.
614637
spill := s.makeSpill(v, s.curBlock)

src/cmd/compile/internal/ssa/regalloc_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package ssa
66

77
import (
88
"cmd/compile/internal/types"
9+
"cmd/internal/obj/x86"
910
"fmt"
1011
"testing"
1112
)
@@ -279,3 +280,27 @@ func numOps(b *Block, op Op) int {
279280
}
280281
return n
281282
}
283+
284+
func TestRematerializeableRegCompatible(t *testing.T) {
285+
c := testConfig(t)
286+
f := c.Fun("entry",
287+
Bloc("entry",
288+
Valu("mem", OpInitMem, types.TypeMem, 0, nil),
289+
Valu("x", OpAMD64MOVLconst, c.config.Types.Int32, 1, nil),
290+
Valu("a", OpAMD64POR, c.config.Types.Float32, 0, nil, "x", "x"),
291+
Valu("res", OpMakeResult, types.NewResults([]*types.Type{c.config.Types.Float32, types.TypeMem}), 0, nil, "a", "mem"),
292+
Ret("res"),
293+
),
294+
)
295+
regalloc(f.f)
296+
checkFunc(f.f)
297+
moveFound := false
298+
for _, v := range f.f.Blocks[0].Values {
299+
if v.Op == OpCopy && x86.REG_X0 <= v.Reg() && v.Reg() <= x86.REG_X31 {
300+
moveFound = true
301+
}
302+
}
303+
if !moveFound {
304+
t.Errorf("Expects an Copy to be issued, but got: %+v", f.f)
305+
}
306+
}

0 commit comments

Comments
 (0)