Skip to content

Commit 35b896f

Browse files
authored
Merge pull request #7093 from jfinkels/tsort-print-cycle
tsort: print nodes and cycles as they are visited
2 parents 5d6f51a + 6287924 commit 35b896f

File tree

2 files changed

+120
-39
lines changed

2 files changed

+120
-39
lines changed

src/uu/tsort/src/tsort.rs

Lines changed: 92 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
5-
//spell-checker:ignore TAOCP
5+
//spell-checker:ignore TAOCP indegree
66
use clap::{crate_version, Arg, Command};
77
use std::collections::{HashMap, HashSet, VecDeque};
88
use std::fmt::Display;
@@ -75,28 +75,16 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
7575
};
7676

7777
// Create the directed graph from pairs of tokens in the input data.
78-
let mut g = Graph::default();
78+
let mut g = Graph::new(input.clone());
7979
for ab in data.split_whitespace().collect::<Vec<&str>>().chunks(2) {
8080
match ab {
8181
[a, b] => g.add_edge(a, b),
8282
_ => return Err(TsortError::NumTokensOdd(input.to_string()).into()),
8383
}
8484
}
8585

86-
match g.run_tsort() {
87-
Err(cycle) => {
88-
show!(TsortError::Loop(input.to_string()));
89-
for node in &cycle {
90-
show!(TsortError::LoopNode(node.to_string()));
91-
}
92-
println!("{}", cycle.join("\n"));
93-
Ok(())
94-
}
95-
Ok(ordering) => {
96-
println!("{}", ordering.join("\n"));
97-
Ok(())
98-
}
99-
}
86+
g.run_tsort();
87+
Ok(())
10088
}
10189
pub fn uu_app() -> Command {
10290
Command::new(uucore::util_name())
@@ -112,34 +100,45 @@ pub fn uu_app() -> Command {
112100
)
113101
}
114102

103+
/// Find the element `x` in `vec` and remove it, returning its index.
104+
fn remove<T>(vec: &mut Vec<T>, x: T) -> Option<usize>
105+
where
106+
T: PartialEq,
107+
{
108+
vec.iter().position(|item| *item == x).inspect(|i| {
109+
vec.remove(*i);
110+
})
111+
}
112+
115113
// We use String as a representation of node here
116114
// but using integer may improve performance.
117-
115+
#[derive(Default)]
118116
struct Node<'input> {
119117
successor_names: Vec<&'input str>,
120118
predecessor_count: usize,
121119
}
122120

123121
impl<'input> Node<'input> {
124-
fn new() -> Self {
125-
Node {
126-
successor_names: Vec::new(),
127-
predecessor_count: 0,
128-
}
129-
}
130-
131122
fn add_successor(&mut self, successor_name: &'input str) {
132123
self.successor_names.push(successor_name);
133124
}
134125
}
135-
#[derive(Default)]
126+
136127
struct Graph<'input> {
128+
name: String,
137129
nodes: HashMap<&'input str, Node<'input>>,
138130
}
139131

140132
impl<'input> Graph<'input> {
133+
fn new(name: String) -> Graph<'input> {
134+
Self {
135+
name,
136+
nodes: HashMap::default(),
137+
}
138+
}
139+
141140
fn add_node(&mut self, name: &'input str) {
142-
self.nodes.entry(name).or_insert_with(Node::new);
141+
self.nodes.entry(name).or_default();
143142
}
144143

145144
fn add_edge(&mut self, from: &'input str, to: &'input str) {
@@ -154,9 +153,14 @@ impl<'input> Graph<'input> {
154153
to_node.predecessor_count += 1;
155154
}
156155
}
156+
157+
fn remove_edge(&mut self, u: &'input str, v: &'input str) {
158+
remove(&mut self.nodes.get_mut(u).unwrap().successor_names, v);
159+
self.nodes.get_mut(v).unwrap().predecessor_count -= 1;
160+
}
161+
157162
/// Implementation of algorithm T from TAOCP (Don. Knuth), vol. 1.
158-
fn run_tsort(&mut self) -> Result<Vec<&'input str>, Vec<&'input str>> {
159-
let mut result = Vec::with_capacity(self.nodes.len());
163+
fn run_tsort(&mut self) {
160164
// First, we find a node that have no prerequisites (independent nodes)
161165
// If no such node exists, then there is a cycle.
162166
let mut independent_nodes_queue: VecDeque<&'input str> = self
@@ -173,10 +177,18 @@ impl<'input> Graph<'input> {
173177
independent_nodes_queue.make_contiguous().sort_unstable(); // to make sure the resulting ordering is deterministic we need to order independent nodes
174178
// FIXME: this doesn't comply entirely with the GNU coreutils implementation.
175179

176-
// we remove each independent node, from the graph, updating each successor predecessor_count variable as we do.
177-
while let Some(name_of_next_node_to_process) = independent_nodes_queue.pop_front() {
178-
result.push(name_of_next_node_to_process);
179-
if let Some(node_to_process) = self.nodes.remove(name_of_next_node_to_process) {
180+
// To make sure the resulting ordering is deterministic we
181+
// need to order independent nodes.
182+
//
183+
// FIXME: this doesn't comply entirely with the GNU coreutils
184+
// implementation.
185+
independent_nodes_queue.make_contiguous().sort_unstable();
186+
187+
while !self.nodes.is_empty() {
188+
// Get the next node (breaking any cycles necessary to do so).
189+
let v = self.find_next_node(&mut independent_nodes_queue);
190+
println!("{v}");
191+
if let Some(node_to_process) = self.nodes.remove(v) {
180192
for successor_name in node_to_process.successor_names {
181193
let successor_node = self.nodes.get_mut(successor_name).unwrap();
182194
successor_node.predecessor_count -= 1;
@@ -187,20 +199,61 @@ impl<'input> Graph<'input> {
187199
}
188200
}
189201
}
202+
}
203+
204+
/// Get the in-degree of the node with the given name.
205+
fn indegree(&self, name: &str) -> Option<usize> {
206+
self.nodes.get(name).map(|data| data.predecessor_count)
207+
}
190208

191-
// if the graph has no cycle (it's a dependency tree), the graph should be empty now, as all nodes have been deleted.
192-
if self.nodes.is_empty() {
193-
Ok(result)
194-
} else {
195-
// otherwise, we detect and show a cycle to the user (as the GNU coreutils implementation does)
196-
Err(self.detect_cycle())
209+
// Pre-condition: self.nodes is non-empty.
210+
fn find_next_node(&mut self, frontier: &mut VecDeque<&'input str>) -> &'input str {
211+
// If there are no nodes of in-degree zero but there are still
212+
// un-visited nodes in the graph, then there must be a cycle.
213+
// We need to find the cycle, display it, and then break the
214+
// cycle.
215+
//
216+
// A cycle is guaranteed to be of length at least two. We break
217+
// the cycle by deleting an arbitrary edge (the first). That is
218+
// not necessarily the optimal thing, but it should be enough to
219+
// continue making progress in the graph traversal.
220+
//
221+
// It is possible that deleting the edge does not actually
222+
// result in the target node having in-degree zero, so we repeat
223+
// the process until such a node appears.
224+
loop {
225+
match frontier.pop_front() {
226+
None => self.find_and_break_cycle(frontier),
227+
Some(v) => return v,
228+
}
229+
}
230+
}
231+
232+
fn find_and_break_cycle(&mut self, frontier: &mut VecDeque<&'input str>) {
233+
let cycle = self.detect_cycle();
234+
show!(TsortError::Loop(self.name.clone()));
235+
for node in &cycle {
236+
show!(TsortError::LoopNode(node.to_string()));
237+
}
238+
let u = cycle[0];
239+
let v = cycle[1];
240+
self.remove_edge(u, v);
241+
if self.indegree(v).unwrap() == 0 {
242+
frontier.push_back(v);
197243
}
198244
}
199245

200246
fn detect_cycle(&self) -> Vec<&'input str> {
247+
// Sort the nodes just to make this function deterministic.
248+
let mut nodes = Vec::new();
249+
for node in self.nodes.keys() {
250+
nodes.push(node);
251+
}
252+
nodes.sort_unstable();
253+
201254
let mut visited = HashSet::new();
202255
let mut stack = Vec::with_capacity(self.nodes.len());
203-
for &node in self.nodes.keys() {
256+
for node in nodes {
204257
if !visited.contains(node) && self.dfs(node, &mut visited, &mut stack) {
205258
return stack;
206259
}

tests/by-util/test_tsort.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,31 @@ fn test_split_on_any_whitespace() {
8383
.succeeds()
8484
.stdout_only("a\nb\n");
8585
}
86+
87+
#[test]
88+
fn test_cycle() {
89+
// The graph looks like: a --> b <==> c --> d
90+
new_ucmd!()
91+
.pipe_in("a b b c c d c b")
92+
.fails()
93+
.code_is(1)
94+
.stdout_is("a\nc\nd\nb\n")
95+
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\n");
96+
}
97+
98+
#[test]
99+
fn test_two_cycles() {
100+
// The graph looks like:
101+
//
102+
// a
103+
// |
104+
// V
105+
// c <==> b <==> d
106+
//
107+
new_ucmd!()
108+
.pipe_in("a b b c c b b d d b")
109+
.fails()
110+
.code_is(1)
111+
.stdout_is("a\nc\nd\nb\n")
112+
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\ntsort: -: input contains a loop:\ntsort: b\ntsort: d\n");
113+
}

0 commit comments

Comments
 (0)