Skip to content

Commit af99952

Browse files
committed
tsort: print nodes and cycles as they are visited
Update `tsort` so that * nodes are printed as they are visited, * cycles are printed as they are discovered, * finding a cycle doesn't terminate the traversal, * multiple cycles can be found and displayed. Fixes #7074
1 parent 1bb33e0 commit af99952

File tree

2 files changed

+122
-30
lines changed

2 files changed

+122
-30
lines changed

src/uu/tsort/src/tsort.rs

Lines changed: 94 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
5-
//spell-checker:ignore TAOCP
5+
//spell-checker:ignore TAOCP indegree
66
use clap::{crate_version, Arg, Command};
77
use std::collections::{HashMap, HashSet, VecDeque};
88
use std::fmt::Display;
@@ -75,28 +75,16 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
7575
};
7676

7777
// Create the directed graph from pairs of tokens in the input data.
78-
let mut g = Graph::default();
78+
let mut g = Graph::new(input.clone());
7979
for ab in data.split_whitespace().collect::<Vec<&str>>().chunks(2) {
8080
match ab {
8181
[a, b] => g.add_edge(a, b),
8282
_ => return Err(TsortError::NumTokensOdd(input.to_string()).into()),
8383
}
8484
}
8585

86-
match g.run_tsort() {
87-
Err(cycle) => {
88-
show!(TsortError::Loop(input.to_string()));
89-
for node in &cycle {
90-
show!(TsortError::LoopNode(node.to_string()));
91-
}
92-
println!("{}", cycle.join("\n"));
93-
Ok(())
94-
}
95-
Ok(ordering) => {
96-
println!("{}", ordering.join("\n"));
97-
Ok(())
98-
}
99-
}
86+
g.run_tsort();
87+
Ok(())
10088
}
10189
pub fn uu_app() -> Command {
10290
Command::new(uucore::util_name())
@@ -112,6 +100,20 @@ pub fn uu_app() -> Command {
112100
)
113101
}
114102

103+
/// Find the element `x` in `vec` and remove it, returning its index.
104+
fn remove<T>(vec: &mut Vec<T>, x: T) -> Option<usize>
105+
where
106+
T: PartialEq,
107+
{
108+
for i in 0..vec.len() {
109+
if vec[i] == x {
110+
vec.remove(i);
111+
return Some(i);
112+
}
113+
}
114+
None
115+
}
116+
115117
// We use String as a representation of node here
116118
// but using integer may improve performance.
117119
#[derive(Default)]
@@ -125,12 +127,20 @@ impl<'input> Node<'input> {
125127
self.successor_names.push(successor_name);
126128
}
127129
}
128-
#[derive(Default)]
130+
129131
struct Graph<'input> {
132+
name: String,
130133
nodes: HashMap<&'input str, Node<'input>>,
131134
}
132135

133136
impl<'input> Graph<'input> {
137+
fn new(name: String) -> Graph<'input> {
138+
Self {
139+
name,
140+
nodes: HashMap::default(),
141+
}
142+
}
143+
134144
fn add_node(&mut self, name: &'input str) {
135145
self.nodes.entry(name).or_default();
136146
}
@@ -147,9 +157,14 @@ impl<'input> Graph<'input> {
147157
to_node.predecessor_count += 1;
148158
}
149159
}
160+
161+
fn remove_edge(&mut self, u: &'input str, v: &'input str) {
162+
remove(&mut self.nodes.get_mut(u).unwrap().successor_names, v);
163+
self.nodes.get_mut(v).unwrap().predecessor_count -= 1;
164+
}
165+
150166
/// Implementation of algorithm T from TAOCP (Don. Knuth), vol. 1.
151-
fn run_tsort(&mut self) -> Result<Vec<&'input str>, Vec<&'input str>> {
152-
let mut result = Vec::with_capacity(self.nodes.len());
167+
fn run_tsort(&mut self) {
153168
// First, we find a node that have no prerequisites (independent nodes)
154169
// If no such node exists, then there is a cycle.
155170
let mut independent_nodes_queue: VecDeque<&'input str> = self
@@ -166,10 +181,18 @@ impl<'input> Graph<'input> {
166181
independent_nodes_queue.make_contiguous().sort_unstable(); // to make sure the resulting ordering is deterministic we need to order independent nodes
167182
// FIXME: this doesn't comply entirely with the GNU coreutils implementation.
168183

169-
// we remove each independent node, from the graph, updating each successor predecessor_count variable as we do.
170-
while let Some(name_of_next_node_to_process) = independent_nodes_queue.pop_front() {
171-
result.push(name_of_next_node_to_process);
172-
if let Some(node_to_process) = self.nodes.remove(name_of_next_node_to_process) {
184+
// To make sure the resulting ordering is deterministic we
185+
// need to order independent nodes.
186+
//
187+
// FIXME: this doesn't comply entirely with the GNU coreutils
188+
// implementation.
189+
independent_nodes_queue.make_contiguous().sort_unstable();
190+
191+
while !self.nodes.is_empty() {
192+
// Get the next node (breaking any cycles necessary to do so).
193+
let v = self.find_next_node(&mut independent_nodes_queue);
194+
println!("{v}");
195+
if let Some(node_to_process) = self.nodes.remove(v) {
173196
for successor_name in node_to_process.successor_names {
174197
let successor_node = self.nodes.get_mut(successor_name).unwrap();
175198
successor_node.predecessor_count -= 1;
@@ -180,20 +203,61 @@ impl<'input> Graph<'input> {
180203
}
181204
}
182205
}
206+
}
183207

184-
// if the graph has no cycle (it's a dependency tree), the graph should be empty now, as all nodes have been deleted.
185-
if self.nodes.is_empty() {
186-
Ok(result)
187-
} else {
188-
// otherwise, we detect and show a cycle to the user (as the GNU coreutils implementation does)
189-
Err(self.detect_cycle())
208+
/// Get the in-degree of the node with the given name.
209+
fn indegree(&self, name: &str) -> Option<usize> {
210+
self.nodes.get(name).map(|data| data.predecessor_count)
211+
}
212+
213+
// Pre-condition: self.nodes is non-empty.
214+
fn find_next_node(&mut self, frontier: &mut VecDeque<&'input str>) -> &'input str {
215+
// If there are no nodes of in-degree zero but there are still
216+
// un-visited nodes in the graph, then there must be a cycle.
217+
// We need to find the cycle, display it, and then break the
218+
// cycle.
219+
//
220+
// A cycle is guaranteed to be of length at least two. We break
221+
// the cycle by deleting an arbitrary edge (the first). That is
222+
// not necessarily the optimal thing, but it should be enough to
223+
// continue making progress in the graph traversal.
224+
//
225+
// It is possible that deleting the edge does not actually
226+
// result in the target node having in-degree zero, so we repeat
227+
// the process until such a node appears.
228+
loop {
229+
match frontier.pop_front() {
230+
None => self.find_and_break_cycle(frontier),
231+
Some(v) => return v,
232+
}
233+
}
234+
}
235+
236+
fn find_and_break_cycle(&mut self, frontier: &mut VecDeque<&'input str>) {
237+
let cycle = self.detect_cycle();
238+
show!(TsortError::Loop(self.name.clone()));
239+
for node in &cycle {
240+
show!(TsortError::LoopNode(node.to_string()));
241+
}
242+
let u = cycle[0];
243+
let v = cycle[1];
244+
self.remove_edge(u, v);
245+
if self.indegree(v).unwrap() == 0 {
246+
frontier.push_back(v);
190247
}
191248
}
192249

193250
fn detect_cycle(&self) -> Vec<&'input str> {
251+
// Sort the nodes just to make this function deterministic.
252+
let mut nodes = Vec::new();
253+
for node in self.nodes.keys() {
254+
nodes.push(node);
255+
}
256+
nodes.sort_unstable();
257+
194258
let mut visited = HashSet::new();
195259
let mut stack = Vec::with_capacity(self.nodes.len());
196-
for &node in self.nodes.keys() {
260+
for node in nodes {
197261
if !visited.contains(node) && self.dfs(node, &mut visited, &mut stack) {
198262
return stack;
199263
}

tests/by-util/test_tsort.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,31 @@ fn test_split_on_any_whitespace() {
8383
.succeeds()
8484
.stdout_only("a\nb\n");
8585
}
86+
87+
#[test]
88+
fn test_cycle() {
89+
// The graph looks like: a --> b <==> c --> d
90+
new_ucmd!()
91+
.pipe_in("a b b c c d c b")
92+
.fails()
93+
.code_is(1)
94+
.stdout_is("a\nc\nd\nb\n")
95+
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\n");
96+
}
97+
98+
#[test]
99+
fn test_two_cycles() {
100+
// The graph looks like:
101+
//
102+
// a
103+
// |
104+
// V
105+
// c <==> b <==> d
106+
//
107+
new_ucmd!()
108+
.pipe_in("a b b c c b b d d b")
109+
.fails()
110+
.code_is(1)
111+
.stdout_is("a\nc\nd\nb\n")
112+
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\ntsort: -: input contains a loop:\ntsort: b\ntsort: d\n");
113+
}

0 commit comments

Comments
 (0)