@@ -56,10 +56,35 @@ func putStackAllocState(s *stackAllocState) {
5656}
5757
5858type stackValState struct {
59- typ * types.Type
60- spill * Value
61- needSlot bool
62- isArg bool
59+ typ * types.Type
60+ spill * Value
61+ needSlot bool
62+ isArg bool
63+ defBlock ID
64+ useBlocks []stackUseBlock
65+ }
66+
67+ // addUseBlock adds a block to the set of blocks that uses this value.
68+ // Note that we only loosely enforce the set property by checking the last
69+ // block that was appended to the list and duplicates may occur.
70+ // Because we add values block by block (barring phi-nodes), the number of duplicates is
71+ // small and we deduplicate as part of the liveness algorithm later anyway.
72+ func (sv * stackValState ) addUseBlock (b * Block , liveout bool ) {
73+ entry := stackUseBlock {
74+ b : b ,
75+ liveout : liveout ,
76+ }
77+ if sv .useBlocks == nil || sv .useBlocks [len (sv .useBlocks )- 1 ] != entry {
78+ sv .useBlocks = append (sv .useBlocks , stackUseBlock {
79+ b : b ,
80+ liveout : liveout ,
81+ })
82+ }
83+ }
84+
85+ type stackUseBlock struct {
86+ b * Block
87+ liveout bool
6388}
6489
6590// stackalloc allocates storage in the stack frame for
@@ -99,6 +124,7 @@ func (s *stackAllocState) init(f *Func, spillLive [][]ID) {
99124 s .values [v .ID ].typ = v .Type
100125 s .values [v .ID ].needSlot = ! v .Type .IsMemory () && ! v .Type .IsVoid () && ! v .Type .IsFlags () && f .getHome (v .ID ) == nil && ! v .rematerializeable () && ! v .OnWasmStack
101126 s .values [v .ID ].isArg = hasAnyArgOp (v )
127+ s .values [v .ID ].defBlock = b .ID
102128 if f .pass .debug > stackDebug && s .values [v .ID ].needSlot {
103129 fmt .Printf ("%s needs a stack slot\n " , v )
104130 }
@@ -291,80 +317,89 @@ func (s *stackAllocState) stackalloc() {
291317
292318// computeLive computes a map from block ID to a list of
293319// stack-slot-needing value IDs live at the end of that block.
294- // TODO: this could be quadratic if lots of variables are live across lots of
295- // basic blocks. Figure out a way to make this function (or, more precisely, the user
296- // of this function) require only linear size & time.
297320func (s * stackAllocState ) computeLive (spillLive [][]ID ) {
298- s .live = make ([][]ID , s .f .NumBlocks ())
299- var phis []* Value
300- live := s .f .newSparseSet (s .f .NumValues ())
301- defer s .f .retSparseSet (live )
302- t := s .f .newSparseSet (s .f .NumValues ())
303- defer s .f .retSparseSet (t )
304-
305- // Instead of iterating over f.Blocks, iterate over their postordering.
306- // Liveness information flows backward, so starting at the end
307- // increases the probability that we will stabilize quickly.
308- po := s .f .postorder ()
309- for {
310- changed := false
311- for _ , b := range po {
312- // Start with known live values at the end of the block
313- live .clear ()
314- live .addAll (s .live [b .ID ])
315-
316- // Propagate backwards to the start of the block
317- phis = phis [:0 ]
318- for i := len (b .Values ) - 1 ; i >= 0 ; i -- {
319- v := b .Values [i ]
320- live .remove (v .ID )
321- if v .Op == OpPhi {
322- // Save phi for later.
323- // Note: its args might need a stack slot even though
324- // the phi itself doesn't. So don't use needSlot.
325- if ! v .Type .IsMemory () && ! v .Type .IsVoid () {
326- phis = append (phis , v )
327- }
328- continue
329- }
330- for _ , a := range v .Args {
331- if s .values [a .ID ].needSlot {
332- live .add (a .ID )
333- }
334- }
335- }
336321
337- // for each predecessor of b, expand its list of live-at-end values
338- // invariant: s contains the values live at the start of b (excluding phi inputs)
339- for i , e := range b .Preds {
340- p := e .b
341- t .clear ()
342- t .addAll (s .live [p .ID ])
343- t .addAll (live .contents ())
344- t .addAll (spillLive [p .ID ])
345- for _ , v := range phis {
346- a := v .Args [i ]
347- if s .values [a .ID ].needSlot {
348- t .add (a .ID )
349- }
350- if spill := s .values [a .ID ].spill ; spill != nil {
322+ // Because values using stack slots are few and far inbetween
323+ // (compared to the set of all values), we use a path exploration
324+ // algorithm to calculate liveness here.
325+ f := s .f
326+ for _ , b := range f .Blocks {
327+ for _ , spillvid := range spillLive [b .ID ] {
328+ val := & s .values [spillvid ]
329+ val .addUseBlock (b , true )
330+ }
331+ for _ , v := range b .Values {
332+ for i , a := range v .Args {
333+ val := & s .values [a .ID ]
334+ useBlock := b
335+ forceLiveout := false
336+ if v .Op == OpPhi {
337+ useBlock = b .Preds [i ].b
338+ forceLiveout = true
339+ if spill := val .spill ; spill != nil {
351340 //TODO: remove? Subsumed by SpillUse?
352- t . add ( spill .ID )
341+ s . values [ spill .ID ]. addUseBlock ( useBlock , true )
353342 }
354343 }
355- if t . size () == len ( s . live [ p . ID ]) {
344+ if ! val . needSlot {
356345 continue
357346 }
358- // grow p's live set
359- s .live [p .ID ] = append (s .live [p .ID ][:0 ], t .contents ()... )
360- changed = true
347+ val .addUseBlock (useBlock , forceLiveout )
361348 }
362349 }
350+ }
363351
364- if ! changed {
365- break
352+ s .live = make ([][]ID , f .NumBlocks ())
353+ push := func (bid , vid ID ) {
354+ l := s .live [bid ]
355+ if l == nil || l [len (l )- 1 ] != vid {
356+ l = append (l , vid )
357+ s .live [bid ] = l
366358 }
367359 }
360+ // TODO: If we can help along the interference graph by calculating livein sets,
361+ // we can do so trivially by turning this sparse set into an array of arrays
362+ // and checking the top for the current value instead of inclusion in the sparse set.
363+ seen := f .newSparseSet (f .NumBlocks ())
364+ defer f .retSparseSet (seen )
365+ // instead of pruning out duplicate blocks when we build the useblocks slices
366+ // or when we add them to the queue, rely on the seen set to stop considering
367+ // them. This is slightly faster than building the workqueues as sets
368+ //
369+ // However, this means that the queue can grow larger than the number of blocks,
370+ // usually in very short functions. Returning a slice with values appended beyond the
371+ // original allocation can corrupt the allocator state, so cap the queue and return
372+ // the originally allocated slice regardless.
373+ allocedBqueue := f .Cache .allocBlockSlice (f .NumBlocks ())
374+ defer f .Cache .freeBlockSlice (allocedBqueue )
375+ bqueue := allocedBqueue [:0 :f .NumBlocks ()]
376+
377+ for vid , v := range s .values {
378+ if ! v .needSlot {
379+ continue
380+ }
381+ seen .clear ()
382+ bqueue = bqueue [:0 ]
383+ for _ , b := range v .useBlocks {
384+ if b .liveout {
385+ push (b .b .ID , ID (vid ))
386+ }
387+ bqueue = append (bqueue , b .b )
388+ }
389+ for len (bqueue ) > 0 {
390+ work := bqueue [len (bqueue )- 1 ]
391+ bqueue = bqueue [:len (bqueue )- 1 ]
392+ if seen .contains (work .ID ) || work .ID == v .defBlock {
393+ continue
394+ }
395+ seen .add (work .ID )
396+ for _ , e := range work .Preds {
397+ push (e .b .ID , ID (vid ))
398+ bqueue = append (bqueue , e .b )
399+ }
400+ }
401+ }
402+
368403 if s .f .pass .debug > stackDebug {
369404 for _ , b := range s .f .Blocks {
370405 fmt .Printf ("stacklive %s %v\n " , b , s .live [b .ID ])
0 commit comments