Skip to content

Commit bcaaf39

Browse files
authored
Simplify stack-to-stack moves (#159)
* Simplify parallel stack-to-stack moves Most of the changes here are simply refactoring. Resolving stack-to-stack moves first fills in the scratch register for the parallel moves, if one is needed. So we can split that into two phases, inlining the `stack_to_stack` function which was not used anywhere else. That lets us establish the invariant that we only continue into the second phase if there is at least one stack-to-stack move. Because `compute` consumes `self`, we know that the lazily-allocated stack-to-stack scratch register has not been allocated yet at entry to the loop, but because the loop only runs if there are stack-to-stack moves, we know the scratch register will definitely be allocated. So we can remove the corresponding state from the `MoveVecWithScratch` struct and keep it in locals with shorter names instead, eagerly initialized before the start of the loop. The above changes should have no effect on the output. However I then added state tracking for whether the scratch register or the save slot already contain the right value and therefore don't need to be the target of another move. This should remove the need for the redundant-move eliminator to clean up after parallel moves. My first approach assumed the correct value was in exactly one of these places, but both allocations can be correct, and we can save more copies by acknowledging that. * Remove MoveAndScratchResolver::new This struct now contains exactly the fields whose values are passed to `new`. I feel it's better API design in this case to use the named fields than to expect callers to keep track of the positional arguments to the `new` method. * Clarify relationship between save_slot/scratch_dirty * Review comments: factor out is_stack_to_stack_move * Remove "victim" terminology from public API Also update more comments I missed.
1 parent 17d621b commit bcaaf39

File tree

3 files changed

+131
-104
lines changed

3 files changed

+131
-104
lines changed

fuzz/fuzz_targets/moves.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,20 @@ fuzz_target!(|testcase: TestCase| {
8383
// Resolve uses of scratch reg and stack-to-stack moves with the
8484
// scratch resolver.
8585
let mut avail = testcase.available_pregs.clone();
86-
let get_reg = || avail.pop();
86+
let find_free_reg = || avail.pop();
8787
let mut next_slot = 32;
8888
let get_stackslot = || {
8989
let slot = next_slot;
9090
next_slot += 1;
9191
Allocation::stack(SpillSlot::new(slot))
9292
};
9393
let preferred_victim = PReg::new(0, RegClass::Int);
94-
let scratch_resolver =
95-
MoveAndScratchResolver::new(get_reg, get_stackslot, is_stack_alloc, preferred_victim);
94+
let scratch_resolver = MoveAndScratchResolver {
95+
find_free_reg,
96+
get_stackslot,
97+
is_stack_alloc,
98+
borrowed_scratch_reg: preferred_victim,
99+
};
96100
let moves = scratch_resolver.compute(moves);
97101
log::trace!("resolved moves: {:?}", moves);
98102

src/ion/moves.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -909,7 +909,7 @@ impl<'a, F: Function> Env<'a, F> {
909909
from: pos_prio.pos,
910910
to: pos_prio.pos.next(),
911911
});
912-
let get_reg = || {
912+
let find_free_reg = || {
913913
// Use the dedicated scratch register first if it is
914914
// available.
915915
if let Some(reg) = dedicated_scratch.take() {
@@ -958,12 +958,12 @@ impl<'a, F: Function> Env<'a, F> {
958958
};
959959
let preferred_victim = self.preferred_victim_by_class[regclass as usize];
960960

961-
let scratch_resolver = MoveAndScratchResolver::new(
962-
get_reg,
961+
let scratch_resolver = MoveAndScratchResolver {
962+
find_free_reg,
963963
get_stackslot,
964964
is_stack_alloc,
965-
preferred_victim,
966-
);
965+
borrowed_scratch_reg: preferred_victim,
966+
};
967967

968968
let resolved = scratch_resolver.compute(resolved);
969969

src/moves.rs

Lines changed: 119 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -277,15 +277,6 @@ impl<T> MoveVecWithScratch<T> {
277277
MoveVecWithScratch::Scratch(..) => true,
278278
}
279279
}
280-
281-
/// Do any moves go from stack to stack?
282-
pub fn stack_to_stack(&self, is_stack_alloc: impl Fn(Allocation) -> bool) -> bool {
283-
match self {
284-
MoveVecWithScratch::NoScratch(moves) | MoveVecWithScratch::Scratch(moves) => moves
285-
.iter()
286-
.any(|&(src, dst, _)| is_stack_alloc(src) && is_stack_alloc(dst)),
287-
}
288-
}
289280
}
290281

291282
/// Final stage of move resolution: finding or using scratch
@@ -306,37 +297,28 @@ impl<T> MoveVecWithScratch<T> {
306297
///
307298
/// Then, we resolve stack-to-stack moves into stack-to-reg /
308299
/// reg-to-stack pairs. For this, we try to allocate a second free
309-
/// register. If unavailable, we create another scratch stackslot, and
310-
/// we pick a "victim" register in the appropriate class, and we
311-
/// resolve into: victim -> extra-stackslot; stack-src -> victim;
312-
/// victim -> stack-dst; extra-stackslot -> victim.
313-
///
314-
/// Sometimes move elision will be able to clean this up a bit. But,
315-
/// for simplicity reasons, let's keep the concerns separated! So we
316-
/// always do the full expansion above.
300+
/// register. If unavailable, we create a new scratch stackslot to
301+
/// serve as a backup of one of the in-use registers, then borrow that
302+
/// register as the scratch register in the middle of stack-to-stack
303+
/// moves.
317304
pub struct MoveAndScratchResolver<GetReg, GetStackSlot, IsStackAlloc>
318305
where
319306
GetReg: FnMut() -> Option<Allocation>,
320307
GetStackSlot: FnMut() -> Allocation,
321308
IsStackAlloc: Fn(Allocation) -> bool,
322309
{
323-
/// Scratch register for stack-to-stack move expansion.
324-
stack_stack_scratch_reg: Option<Allocation>,
325-
/// Stackslot into which we need to save the stack-to-stack
326-
/// scratch reg before doing any stack-to-stack moves, if we stole
327-
/// the reg.
328-
stack_stack_scratch_reg_save: Option<Allocation>,
329310
/// Closure that finds us a PReg at the current location.
330-
find_free_reg: GetReg,
311+
pub find_free_reg: GetReg,
331312
/// Closure that gets us a stackslot, if needed.
332-
get_stackslot: GetStackSlot,
313+
pub get_stackslot: GetStackSlot,
333314
/// Closure to determine whether an `Allocation` refers to a stack slot.
334-
is_stack_alloc: IsStackAlloc,
335-
/// The victim PReg to evict to another stackslot at every
336-
/// stack-to-stack move if a free PReg is not otherwise
337-
/// available. Provided by caller and statically chosen. This is a
338-
/// very last-ditch option, so static choice is OK.
339-
victim: PReg,
315+
pub is_stack_alloc: IsStackAlloc,
316+
/// Use this register if no free register is available to use as a
317+
/// temporary in stack-to-stack moves. If we do use this register
318+
/// for that purpose, its value will be restored by the end of the
319+
/// move sequence. Provided by caller and statically chosen. This is
320+
/// a very last-ditch option, so static choice is OK.
321+
pub borrowed_scratch_reg: PReg,
340322
}
341323

342324
impl<GetReg, GetStackSlot, IsStackAlloc> MoveAndScratchResolver<GetReg, GetStackSlot, IsStackAlloc>
@@ -345,88 +327,129 @@ where
345327
GetStackSlot: FnMut() -> Allocation,
346328
IsStackAlloc: Fn(Allocation) -> bool,
347329
{
348-
pub fn new(
349-
find_free_reg: GetReg,
350-
get_stackslot: GetStackSlot,
351-
is_stack_alloc: IsStackAlloc,
352-
victim: PReg,
353-
) -> Self {
354-
Self {
355-
stack_stack_scratch_reg: None,
356-
stack_stack_scratch_reg_save: None,
357-
find_free_reg,
358-
get_stackslot,
359-
is_stack_alloc,
360-
victim,
361-
}
362-
}
330+
pub fn compute<T: Debug + Default + Copy>(
331+
mut self,
332+
moves: MoveVecWithScratch<T>,
333+
) -> MoveVec<T> {
334+
let moves = if moves.needs_scratch() {
335+
// Now, find a scratch allocation in order to resolve cycles.
336+
let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)());
337+
trace!("scratch resolver: scratch alloc {:?}", scratch);
338+
339+
moves.with_scratch(scratch)
340+
} else {
341+
moves.without_scratch().unwrap()
342+
};
363343

364-
pub fn compute<T: Debug + Copy>(mut self, moves: MoveVecWithScratch<T>) -> MoveVec<T> {
365-
// First, do we have a vec with no stack-to-stack moves or use
366-
// of a scratch register? Fast return if so.
367-
if !moves.needs_scratch() && !moves.stack_to_stack(&self.is_stack_alloc) {
368-
return moves.without_scratch().unwrap();
344+
// Do we have any stack-to-stack moves? Fast return if not.
345+
let stack_to_stack = moves
346+
.iter()
347+
.any(|&(src, dst, _)| self.is_stack_to_stack_move(src, dst));
348+
if !stack_to_stack {
349+
return moves;
369350
}
370351

371-
let mut result = smallvec![];
372-
373-
// Now, find a scratch allocation in order to resolve cycles.
374-
let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)());
375-
trace!("scratch resolver: scratch alloc {:?}", scratch);
352+
// Allocate a scratch register for stack-to-stack move expansion.
353+
let (scratch_reg, save_slot) = if let Some(reg) = (self.find_free_reg)() {
354+
trace!(
355+
"scratch resolver: have free stack-to-stack scratch preg: {:?}",
356+
reg
357+
);
358+
(reg, None)
359+
} else {
360+
let reg = Allocation::reg(self.borrowed_scratch_reg);
361+
// Stackslot into which we need to save the stack-to-stack
362+
// scratch reg before doing any stack-to-stack moves, if we stole
363+
// the reg.
364+
let save = (self.get_stackslot)();
365+
trace!(
366+
"scratch resolver: stack-to-stack borrowing {:?} with save stackslot {:?}",
367+
reg,
368+
save
369+
);
370+
(reg, Some(save))
371+
};
372+
373+
// Mutually exclusive flags for whether either scratch_reg or
374+
// save_slot need to be restored from the other. Initially,
375+
// scratch_reg has a value we should preserve and save_slot
376+
// has garbage.
377+
let mut scratch_dirty = false;
378+
let mut save_dirty = true;
376379

377-
let moves = moves.with_scratch(scratch);
380+
let mut result = smallvec![];
378381
for &(src, dst, data) in &moves {
379382
// Do we have a stack-to-stack move? If so, resolve.
380-
if (self.is_stack_alloc)(src) && (self.is_stack_alloc)(dst) {
383+
if self.is_stack_to_stack_move(src, dst) {
381384
trace!("scratch resolver: stack to stack: {:?} -> {:?}", src, dst);
382-
// Lazily allocate a stack-to-stack scratch.
383-
if self.stack_stack_scratch_reg.is_none() {
384-
if let Some(reg) = (self.find_free_reg)() {
385-
trace!(
386-
"scratch resolver: have free stack-to-stack scratch preg: {:?}",
387-
reg
388-
);
389-
self.stack_stack_scratch_reg = Some(reg);
390-
} else {
391-
self.stack_stack_scratch_reg = Some(Allocation::reg(self.victim));
392-
self.stack_stack_scratch_reg_save = Some((self.get_stackslot)());
393-
trace!("scratch resolver: stack-to-stack using victim {:?} with save stackslot {:?}",
394-
self.stack_stack_scratch_reg,
395-
self.stack_stack_scratch_reg_save);
385+
386+
// If the selected scratch register is stolen from the
387+
// set of in-use registers, then we need to save the
388+
// current contents of the scratch register before using
389+
// it as a temporary.
390+
if let Some(save_slot) = save_slot {
391+
// However we may have already done so for an earlier
392+
// stack-to-stack move in which case we don't need
393+
// to do it again.
394+
if save_dirty {
395+
debug_assert!(!scratch_dirty);
396+
result.push((scratch_reg, save_slot, T::default()));
397+
save_dirty = false;
396398
}
397399
}
398400

399-
// If we have a "victimless scratch", then do a
400-
// stack-to-scratch / scratch-to-stack sequence.
401-
if self.stack_stack_scratch_reg_save.is_none() {
402-
result.push((src, self.stack_stack_scratch_reg.unwrap(), data));
403-
result.push((self.stack_stack_scratch_reg.unwrap(), dst, data));
401+
// We can't move directly from one stack slot to another
402+
// on any architecture we care about, so stack-to-stack
403+
// moves must go via a scratch register.
404+
result.push((src, scratch_reg, data));
405+
result.push((scratch_reg, dst, data));
406+
scratch_dirty = true;
407+
} else {
408+
// This is not a stack-to-stack move, but we need to
409+
// make sure that the scratch register is in the correct
410+
// state if this move interacts with that register.
411+
if src == scratch_reg && scratch_dirty {
412+
// We're copying from the scratch register so if
413+
// it was stolen for a stack-to-stack move then we
414+
// need to make sure it has the correct contents,
415+
// not whatever was temporarily copied into it. If
416+
// we got scratch_reg from find_free_reg then it
417+
// had better not have been used as the source of
418+
// a move. So if we're here it's because we fell
419+
// back to the caller-provided last-resort scratch
420+
// register, and we must therefore have a save-slot
421+
// allocated too.
422+
debug_assert!(!save_dirty);
423+
let save_slot = save_slot.expect("move source should not be a free register");
424+
result.push((save_slot, scratch_reg, T::default()));
425+
scratch_dirty = false;
404426
}
405-
// Otherwise, save the current value in the
406-
// stack-to-stack scratch reg (which is our victim) to
407-
// the extra stackslot, then do the stack-to-scratch /
408-
// scratch-to-stack sequence, then restore it.
409-
else {
410-
result.push((
411-
self.stack_stack_scratch_reg.unwrap(),
412-
self.stack_stack_scratch_reg_save.unwrap(),
413-
data,
414-
));
415-
result.push((src, self.stack_stack_scratch_reg.unwrap(), data));
416-
result.push((self.stack_stack_scratch_reg.unwrap(), dst, data));
417-
result.push((
418-
self.stack_stack_scratch_reg_save.unwrap(),
419-
self.stack_stack_scratch_reg.unwrap(),
420-
data,
421-
));
427+
if dst == scratch_reg {
428+
// We are writing something to the scratch register
429+
// so it doesn't matter what was there before. We
430+
// can avoid restoring it, but we will need to save
431+
// it again before the next stack-to-stack move.
432+
scratch_dirty = false;
433+
save_dirty = true;
422434
}
423-
} else {
424-
// Normal move.
425435
result.push((src, dst, data));
426436
}
427437
}
428438

439+
// Now that all the stack-to-stack moves are done, restore the
440+
// scratch register if necessary.
441+
if let Some(save_slot) = save_slot {
442+
if scratch_dirty {
443+
debug_assert!(!save_dirty);
444+
result.push((save_slot, scratch_reg, T::default()));
445+
}
446+
}
447+
429448
trace!("scratch resolver: got {:?}", result);
430449
result
431450
}
451+
452+
fn is_stack_to_stack_move(&self, src: Allocation, dst: Allocation) -> bool {
453+
(self.is_stack_alloc)(src) && (self.is_stack_alloc)(dst)
454+
}
432455
}

0 commit comments

Comments
 (0)