Skip to content

Commit 52bf322

Browse files
authored
Simplify parallel-moves state machine (#158)
* Simplify parallel-moves state machine Using a three-state enum instead of visited/onstack bools makes the state transitions easier to reason about. Also note that the behavior was the same for two cases ("there is no next move", and "the next move has already been emitted"), so I merged them. This commit also cuts memory usage in half for the must_come_before array and for the visited/onstack arrays. * Simplify cycle-handling in parallel moves The loop as previously written was guaranteed to run at least once and the first iteration was quite different than later iterations, so peel off the first iteration. That in turn made it clear that scratch_src is always set, which allowed simplifying emitting the final scratch-register move. One thing which was tricky is that if the existing implementation was handed a move of an allocation into itself, then it would previously emit a move from that allocation to the scratch register followed by a move from the scratch register back to the allocation. Peeling the first iteration out of the loop made it a little trickier to ensure that the loop didn't run at all in that case, but also emitting any moves is a silly result. So instead I've filtered out self-moves before starting the depth-first traversal. Ideally we'd do this before checking whether the sources overlap the destinations, since if the only overlaps are self-moves then we can just return the original move list. However it should happen after the debug assertion that there is at most one writer to each allocation, since the input is malformed if there's both a self-move and another move to the same allocation. After #157 lands this can be re-ordered. Because I'm filtering out self-moves and then running the depth-first traversal, if all of the moves were self-moves then we may try to traverse an empty graph. This was easily fixed by not blindly pushing indices onto the stack, and instead letting the existing check for un-visited moves fire because the stack is empty. * Use more idiomatic while-let loops
1 parent 69dba4a commit 52bf322

File tree

1 file changed

+48
-61
lines changed

1 file changed

+48
-61
lines changed

src/moves.rs

Lines changed: 48 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -116,70 +116,63 @@ impl<T: Clone + Copy + Default + PartialEq> ParallelMoves<T> {
116116
}
117117
}
118118

119+
// Moving an allocation into itself is technically a cycle but
120+
// should have no effect, as long as there are no other writes
121+
// into that destination.
122+
self.parallel_moves.retain(|&mut (src, dst, _)| src != dst);
123+
119124
// Construct a mapping from move indices to moves they must
120125
// come before. Any given move must come before a move that
121126
// overwrites its destination; we have moves sorted by dest
122127
// above so we can efficiently find such a move, if any.
123-
let mut must_come_before: SmallVec<[Option<usize>; 16]> =
124-
smallvec![None; self.parallel_moves.len()];
125-
for (i, &(src, _, _)) in self.parallel_moves.iter().enumerate() {
126-
if let Ok(move_to_dst_idx) = self
127-
.parallel_moves
128-
.binary_search_by_key(&src, |&(_, dst, _)| dst)
129-
{
130-
must_come_before[i] = Some(move_to_dst_idx);
131-
}
132-
}
128+
const NONE: usize = usize::MAX;
129+
let must_come_before: SmallVec<[usize; 16]> = self
130+
.parallel_moves
131+
.iter()
132+
.map(|&(src, _, _)| {
133+
self.parallel_moves
134+
.binary_search_by_key(&src, |&(_, dst, _)| dst)
135+
.unwrap_or(NONE)
136+
})
137+
.collect();
133138

134139
// Do a simple stack-based DFS and emit moves in postorder,
135140
// then reverse at the end for RPO. Unlike Tarjan's SCC
136141
// algorithm, we can emit a cycle as soon as we find one, as
137142
// noted above.
143+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
144+
enum State {
145+
/// Not on stack, not visited
146+
ToDo,
147+
/// On stack, not yet visited
148+
Pending,
149+
/// Visited
150+
Done,
151+
}
138152
let mut ret: MoveVec<T> = smallvec![];
139153
let mut stack: SmallVec<[usize; 16]> = smallvec![];
140-
let mut visited: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()];
141-
let mut onstack: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()];
154+
let mut state: SmallVec<[State; 16]> = smallvec![State::ToDo; self.parallel_moves.len()];
142155
let mut scratch_used = false;
143156

144-
stack.push(0);
145-
onstack[0] = true;
146-
loop {
147-
if stack.is_empty() {
148-
if let Some(next) = visited.iter().position(|&flag| !flag) {
149-
stack.push(next);
150-
onstack[next] = true;
151-
} else {
152-
break;
153-
}
154-
}
157+
while let Some(next) = state.iter().position(|&state| state == State::ToDo) {
158+
stack.push(next);
159+
state[next] = State::Pending;
155160

156-
let top = *stack.last().unwrap();
157-
visited[top] = true;
158-
match must_come_before[top] {
159-
None => {
161+
while let Some(&top) = stack.last() {
162+
debug_assert_eq!(state[top], State::Pending);
163+
let next = must_come_before[top];
164+
if next == NONE || state[next] == State::Done {
160165
ret.push(self.parallel_moves[top]);
161-
onstack[top] = false;
166+
state[top] = State::Done;
162167
stack.pop();
163168
while let Some(top) = stack.pop() {
164169
ret.push(self.parallel_moves[top]);
165-
onstack[top] = false;
170+
state[top] = State::Done;
166171
}
167-
}
168-
Some(next) if visited[next] && !onstack[next] => {
169-
ret.push(self.parallel_moves[top]);
170-
onstack[top] = false;
171-
stack.pop();
172-
while let Some(top) = stack.pop() {
173-
ret.push(self.parallel_moves[top]);
174-
onstack[top] = false;
175-
}
176-
}
177-
Some(next) if !visited[next] && !onstack[next] => {
172+
} else if state[next] == State::ToDo {
178173
stack.push(next);
179-
onstack[next] = true;
180-
continue;
181-
}
182-
Some(next) => {
174+
state[next] = State::Pending;
175+
} else {
183176
// Found a cycle -- emit a cyclic-move sequence
184177
// for the cycle on the top of stack, then normal
185178
// moves below it. Recall that these moves will be
@@ -201,29 +194,23 @@ impl<T: Clone + Copy + Default + PartialEq> ParallelMoves<T> {
201194
// C := B
202195
// B := A
203196
// A := scratch
204-
let mut last_dst = None;
205-
let mut scratch_src = None;
206-
while let Some(move_idx) = stack.pop() {
207-
onstack[move_idx] = false;
208-
let (mut src, dst, dst_t) = self.parallel_moves[move_idx];
209-
if last_dst.is_none() {
210-
scratch_src = Some(src);
211-
src = Allocation::none();
212-
scratch_used = true;
213-
} else {
214-
debug_assert_eq!(last_dst.unwrap(), src);
215-
}
216-
ret.push((src, dst, dst_t));
197+
debug_assert_ne!(top, next);
198+
state[top] = State::Done;
199+
stack.pop();
217200

218-
last_dst = Some(dst);
201+
let (scratch_src, dst, dst_t) = self.parallel_moves[top];
202+
scratch_used = true;
203+
204+
ret.push((Allocation::none(), dst, dst_t));
205+
while let Some(move_idx) = stack.pop() {
206+
state[move_idx] = State::Done;
207+
ret.push(self.parallel_moves[move_idx]);
219208

220209
if move_idx == next {
221210
break;
222211
}
223212
}
224-
if let Some(src) = scratch_src {
225-
ret.push((src, Allocation::none(), T::default()));
226-
}
213+
ret.push((scratch_src, Allocation::none(), T::default()));
227214
}
228215
}
229216
}

0 commit comments

Comments
 (0)