Skip to content

Commit 592c2db

Browse files
amusmangopherbot
authored andcommitted
cmd/compile: improve loopRotate to handle nested loops
Enhance loop rotation of nested loops. Currently, loops are processed independently, resulting in unnecessary jumps between outer and inner loops. By processing inner loops before their parent loop, we ensure nested loop blocks are properly placed within their parent loop's block sequence. There is some code size improvement (as measured on amd64) due to jumps to/from inner loop are removed by the updated loopRotate block order: Executable Old .text New .text Change ------------------------------------------------------- asm 2147569 2146481 -0.05% cgo 1977457 1975761 -0.09% compile 10447345 10441905 -0.05% cover 2110097 2108977 -0.05% link 2930289 2929041 -0.04% preprofile 927345 926769 -0.06% vet 3279057 3277009 -0.06% Change-Id: I4b9e993c2be07fad735e6bcf32d062d099d9cfb5 Reviewed-on: https://go-review.googlesource.com/c/go/+/684335 Reviewed-by: Keith Randall <[email protected]> Auto-Submit: Keith Randall <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Reviewed-by: Keith Randall <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent dcb479c commit 592c2db

File tree

2 files changed

+142
-8
lines changed

2 files changed

+142
-8
lines changed

src/cmd/compile/internal/ssa/looprotate.go

Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
package ssa
66

7+
import (
8+
"slices"
9+
)
10+
711
// loopRotate converts loops with a check-loop-condition-at-beginning
812
// to loops with a check-loop-condition-at-end.
913
// This helps loops avoid extra unnecessary jumps.
@@ -41,10 +45,65 @@ func loopRotate(f *Func) {
4145

4246
// Map from block ID to the moving blocks that should
4347
// come right after it.
48+
// If a block, which has its ID present in keys of the 'after' map,
49+
// occurs in some other block's 'after' list, that represents whole
50+
// nested loop, e.g. consider an inner loop I nested into an outer
51+
// loop O. It and Ot are corresponding top block for these loops
52+
// chosen by our algorithm, and It is in the Ot's 'after' list.
53+
//
54+
// Before: After:
55+
//
56+
// e e
57+
// │ │
58+
// │ │Ot ◄───┐
59+
// ▼ ▼▼ │
60+
// ┌───Oh ◄────┐ ┌─┬─Oh │
61+
// │ │ │ │ │ │
62+
// │ │ │ │ │ It◄───┐ │
63+
// │ ▼ │ │ │ ▼ │ │
64+
// │ ┌─Ih◄───┐ │ │ └►Ih │ │
65+
// │ │ │ │ │ │ ┌─┤ │ │
66+
// │ │ ▼ │ │ │ │ ▼ │ │
67+
// │ │ Ib │ │ │ │ Ib │ │
68+
// │ │ └─►It─┘ │ │ │ └─────┘ │
69+
// │ │ │ │ │ │
70+
// │ └►Ie │ │ └►Ie │
71+
// │ └─►Ot───┘ │ └───────┘
72+
// │ │
73+
// └──►Oe └──►Oe
74+
//
75+
// We build the 'after' lists for each of the top blocks Ot and It:
76+
// after[Ot]: Oh, It, Ie
77+
// after[It]: Ih, Ib
4478
after := map[ID][]*Block{}
4579

80+
// Map from loop header ID to the new top block for the loop.
81+
tops := map[ID]*Block{}
82+
83+
// Order loops to rotate any child loop before adding its top block
84+
// to the parent loop's 'after' list.
85+
loopnest.calculateDepths()
86+
loopOrder := f.Cache.allocIntSlice(len(loopnest.loops))
87+
for i := range loopOrder {
88+
loopOrder[i] = i
89+
}
90+
defer f.Cache.freeIntSlice(loopOrder)
91+
slices.SortFunc(loopOrder, func(i, j int) int {
92+
di := loopnest.loops[i].depth
93+
dj := loopnest.loops[j].depth
94+
switch {
95+
case di > dj:
96+
return -1
97+
case di < dj:
98+
return 1
99+
default:
100+
return 0
101+
}
102+
})
103+
46104
// Check each loop header and decide if we want to move it.
47-
for _, loop := range loopnest.loops {
105+
for _, loopIdx := range loopOrder {
106+
loop := loopnest.loops[loopIdx]
48107
b := loop.header
49108
var p *Block // b's in-loop predecessor
50109
for _, e := range b.Preds {
@@ -59,6 +118,7 @@ func loopRotate(f *Func) {
59118
if p == nil {
60119
continue
61120
}
121+
tops[loop.header.ID] = p
62122
p.Hotness |= HotInitial
63123
if f.IsPgoHot {
64124
p.Hotness |= HotPgo
@@ -80,8 +140,10 @@ func loopRotate(f *Func) {
80140
if nextb == p { // original loop predecessor is next
81141
break
82142
}
83-
if loopnest.b2l[nextb.ID] == loop {
84-
after[p.ID] = append(after[p.ID], nextb)
143+
if bloop := loopnest.b2l[nextb.ID]; bloop != nil {
144+
if bloop == loop || bloop.outer == loop && tops[bloop.header.ID] == nextb {
145+
after[p.ID] = append(after[p.ID], nextb)
146+
}
85147
}
86148
b = nextb
87149
}
@@ -90,7 +152,7 @@ func loopRotate(f *Func) {
90152
f.Blocks[idToIdx[p.ID]] = loop.header
91153
idToIdx[loop.header.ID], idToIdx[p.ID] = idToIdx[p.ID], idToIdx[loop.header.ID]
92154

93-
// Place b after p.
155+
// Place loop blocks after p.
94156
for _, b := range after[p.ID] {
95157
move[b.ID] = struct{}{}
96158
}
@@ -107,16 +169,23 @@ func loopRotate(f *Func) {
107169
oldOrder := f.Cache.allocBlockSlice(len(f.Blocks))
108170
defer f.Cache.freeBlockSlice(oldOrder)
109171
copy(oldOrder, f.Blocks)
172+
var moveBlocks func(bs []*Block)
173+
moveBlocks = func(blocks []*Block) {
174+
for _, a := range blocks {
175+
f.Blocks[j] = a
176+
j++
177+
if nextBlocks, ok := after[a.ID]; ok {
178+
moveBlocks(nextBlocks)
179+
}
180+
}
181+
}
110182
for _, b := range oldOrder {
111183
if _, ok := move[b.ID]; ok {
112184
continue
113185
}
114186
f.Blocks[j] = b
115187
j++
116-
for _, a := range after[b.ID] {
117-
f.Blocks[j] = a
118-
j++
119-
}
188+
moveBlocks(after[b.ID])
120189
}
121190
if j != len(oldOrder) {
122191
f.Fatalf("bad reordering in looprotate")
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package ssa
6+
7+
import (
8+
"cmd/compile/internal/types"
9+
"testing"
10+
)
11+
12+
func TestLoopRotateNested(t *testing.T) {
13+
c := testConfig(t)
14+
fun := c.Fun("entry",
15+
Bloc("entry",
16+
Valu("mem", OpInitMem, types.TypeMem, 0, nil),
17+
Valu("constTrue", OpConstBool, types.Types[types.TBOOL], 1, nil),
18+
Goto("outerHeader")),
19+
Bloc("outerHeader",
20+
If("constTrue", "outerBody", "outerExit")),
21+
Bloc("outerBody",
22+
Goto("innerHeader")),
23+
Bloc("innerHeader",
24+
If("constTrue", "innerBody", "innerExit")),
25+
Bloc("innerBody",
26+
Goto("innerTop")),
27+
Bloc("innerTop",
28+
Goto("innerHeader")),
29+
Bloc("innerExit",
30+
Goto("outerTop")),
31+
Bloc("outerTop",
32+
Goto("outerHeader")),
33+
Bloc("outerExit",
34+
Exit("mem")))
35+
36+
blockName := make([]string, len(fun.f.Blocks)+1)
37+
for name, block := range fun.blocks {
38+
blockName[block.ID] = name
39+
}
40+
41+
CheckFunc(fun.f)
42+
loopRotate(fun.f)
43+
CheckFunc(fun.f)
44+
45+
// Verify the resulting block order
46+
expected := []string{
47+
"entry",
48+
"outerTop",
49+
"outerHeader",
50+
"outerBody",
51+
"innerTop",
52+
"innerHeader",
53+
"innerBody",
54+
"innerExit",
55+
"outerExit",
56+
}
57+
if len(expected) != len(fun.f.Blocks) {
58+
t.Fatalf("expected %d blocks, found %d", len(expected), len(fun.f.Blocks))
59+
}
60+
for i, b := range fun.f.Blocks {
61+
if expected[i] != blockName[b.ID] {
62+
t.Errorf("position %d: expected %s, found %s", i, expected[i], blockName[b.ID])
63+
}
64+
}
65+
}

0 commit comments

Comments
 (0)