Fix tsort implementation: add stdin support, cycle detection, odd token validation, and comprehensive tests

Copilot · jgarzik · Copilot · commit 2852e44c169f · 2025-12-22T07:31:55.000Z
Co-authored-by: jgarzik &lt;494411+jgarzik@users.noreply.github.com&gt;
diff --git a/text/tests/text-tests.rs b/text/tests/text-tests.rs
@@ -24,6 +24,7 @@ mod sed;
 mod sort;
 mod tail;
 mod tr;
+mod tsort;
 mod unexpand;
 mod uniq;
 mod wc;
diff --git a/text/tests/tsort/mod.rs b/text/tests/tsort/mod.rs
@@ -0,0 +1,162 @@
+//
+// Copyright (c) 2024 Jeff Garzik
+// Copyright (c) 2024 Hemi Labs, Inc.
+//
+// This file is part of the posixutils-rs project covered under
+// the MIT License.  For the full license text, please see the LICENSE
+// file in the root directory of this project.
+// SPDX-License-Identifier: MIT
+//
+
+use plib::testing::{run_test, TestPlan};
+
+fn tsort_test(
+    args: &[&str],
+    test_data: &str,
+    expected_output: &str,
+    expected_exit_code: i32,
+    expected_err: &str,
+) {
+    let str_args: Vec<String> = args.iter().map(|s| String::from(*s)).collect();
+
+    run_test(TestPlan {
+        cmd: String::from("tsort"),
+        args: str_args,
+        stdin_data: String::from(test_data),
+        expected_out: String::from(expected_output),
+        expected_err: String::from(expected_err),
+        expected_exit_code,
+    });
+}
+
+#[test]
+fn test_basic() {
+    tsort_test(&[], "a b\nc d\nb c\n", "a\nb\nc\nd\n", 0, "");
+}
+
+#[test]
+fn test_simple_chain() {
+    tsort_test(&[], "a b\nb c\nc d\n", "a\nb\nc\nd\n", 0, "");
+}
+
+#[test]
+fn test_multiple_dependencies() {
+    tsort_test(&[], "a b\na c\nb d\nc d\n", "a\nb\nc\nd\n", 0, "");
+}
+
+#[test]
+fn test_self_loop() {
+    tsort_test(&[], "a a\n", "a\n", 0, "");
+}
+
+#[test]
+fn test_empty_input() {
+    tsort_test(&[], "", "", 0, "");
+}
+
+#[test]
+fn test_single_pair() {
+    tsort_test(&[], "a b\n", "a\nb\n", 0, "");
+}
+
+#[test]
+fn test_whitespace_separated_chain() {
+    // Single line with chain dependencies: a->b->c->d
+    tsort_test(&[], "a b b c c d\n", "a\nb\nc\nd\n", 0, "");
+}
+
+#[test]
+fn test_multiline_tokens() {
+    // Chain dependencies across lines
+    tsort_test(&[], "a b\nb c\n", "a\nb\nc\n", 0, "");
+}
+
+#[test]
+fn test_odd_number_of_tokens() {
+    tsort_test(
+        &[],
+        "a b c\n",
+        "",
+        1,
+        "stdin: input contains an odd number of tokens\n",
+    );
+}
+
+#[test]
+fn test_simple_cycle() {
+    tsort_test(
+        &[],
+        "a b\nb a\n",
+        "a\nb\n",
+        1,
+        "stdin: input contains a loop:\nstdin: a\nstdin: b\n",
+    );
+}
+
+#[test]
+fn test_three_way_cycle() {
+    tsort_test(
+        &[],
+        "a b\nb c\nc a\n",
+        "a\nb\nc\n",
+        1,
+        "stdin: input contains a loop:\nstdin: a\nstdin: b\nstdin: c\n",
+    );
+}
+
+#[test]
+fn test_partial_cycle() {
+    // d->e has no cycle, a->b->c->a forms a cycle
+    tsort_test(
+        &[],
+        "a b\nb c\nc a\nd e\n",
+        "d\ne\na\nb\nc\n",
+        1,
+        "stdin: input contains a loop:\nstdin: a\nstdin: b\nstdin: c\n",
+    );
+}
+
+#[test]
+fn test_complex_graph_chain() {
+    // Clear chain: d->c->b->a
+    tsort_test(&[], "d c\nc b\nb a\n", "d\nc\nb\na\n", 0, "");
+}
+
+#[test]
+fn test_two_independent_items() {
+    // Single pair
+    tsort_test(&[], "a b\n", "a\nb\n", 0, "");
+}
+
+#[test]
+fn test_duplicate_pairs() {
+    // Same dependency specified multiple times
+    tsort_test(&[], "a b\na b\nb c\n", "a\nb\nc\n", 0, "");
+}
+
+#[test]
+fn test_long_string_tokens() {
+    tsort_test(
+        &[],
+        "very_long_token_name another_long_token\n",
+        "very_long_token_name\nanother_long_token\n",
+        0,
+        "",
+    );
+}
+
+#[test]
+fn test_numeric_tokens() {
+    tsort_test(&[], "1 2\n2 3\n3 4\n", "1\n2\n3\n4\n", 0, "");
+}
+
+#[test]
+fn test_mixed_tokens() {
+    tsort_test(
+        &[],
+        "file1.c file1.o\nfile1.o prog\n",
+        "file1.c\nfile1.o\nprog\n",
+        0,
+        "",
+    );
+}
diff --git a/text/tsort.rs b/text/tsort.rs
@@ -12,7 +12,7 @@ use std::path::PathBuf;
 
 use clap::Parser;
 use gettextrs::{bind_textdomain_codeset, gettext, setlocale, textdomain, LocaleCategory};
-use plib::io::input_stream_opt;
+use plib::io::input_stream;
 use topological_sort::TopologicalSort;
 
 /// tsort - topological sort
@@ -23,12 +23,17 @@ struct Args {
     file: Option<PathBuf>,
 }
 
-fn tsort_file(pathname: &Option<PathBuf>) -> io::Result<()> {
-    let file = input_stream_opt(pathname)?;
+fn tsort_file(pathname: &Option<PathBuf>) -> io::Result<i32> {
+    // Handle stdin with "-" or no argument
+    let file = match pathname {
+        Some(path) => input_stream(path, true)?,
+        None => input_stream(&PathBuf::new(), false)?,
+    };
     let mut reader = io::BufReader::new(file);
 
     let mut ts = TopologicalSort::<String>::new();
     let mut sv: Vec<String> = Vec::new();
+    let mut all_items: std::collections::HashSet<String> = std::collections::HashSet::new();
 
     loop {
         let mut buffer = String::new();
@@ -41,6 +46,9 @@ fn tsort_file(pathname: &Option<PathBuf>) -> io::Result<()> {
             sv.push(String::from(token));
 
             if sv.len() == 2 {
+                all_items.insert(sv[0].clone());
+                all_items.insert(sv[1].clone());
+
                 if sv[0] == sv[1] {
                     ts.insert(String::from(&sv[0]));
                 } else {
@@ -51,11 +59,56 @@ fn tsort_file(pathname: &Option<PathBuf>) -> io::Result<()> {
         }
     }
 
-    for s in ts {
+    // Check for odd number of tokens
+    if !sv.is_empty() {
+        eprintln!(
+            "{}: input contains an odd number of tokens",
+            pathname_display(pathname)
+        );
+        return Ok(1);
+    }
+
+    // Collect results and check for cycles
+    let mut sorted_items = Vec::new();
+    let mut sorted_set = std::collections::HashSet::new();
+
+    for s in &mut ts {
+        sorted_set.insert(s.clone());
+        sorted_items.push(s);
+    }
+
+    // If there are remaining items after iteration, there's a cycle
+    if ts.len() > 0 {
+        eprintln!("{}: input contains a loop:", pathname_display(pathname));
+
+        // Find items that weren't sorted (these are in the cycle)
+        let mut cycle_items: Vec<String> = all_items.difference(&sorted_set).cloned().collect();
+        cycle_items.sort(); // For consistent output
+
+        // Print cycle items
+        for item in &cycle_items {
+            eprintln!("{}: {}", pathname_display(pathname), item);
+        }
+
+        // Print the sorted items first
+        for s in sorted_items {
+            println!("{}", s);
+        }
+
+        // Then print the cycle items
+        for item in &cycle_items {
+            println!("{}", item);
+        }
+
+        return Ok(1);
+    }
+
+    // Print results
+    for s in sorted_items {
         println!("{}", s);
     }
 
-    Ok(())
+    Ok(0)
 }
 
 fn pathname_display(path: &Option<PathBuf>) -> String {
@@ -72,12 +125,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     let args = Args::parse();
 
-    let mut exit_code = 0;
-
-    if let Err(e) = tsort_file(&args.file) {
-        exit_code = 1;
-        eprintln!("{}: {}", pathname_display(&args.file), e);
-    }
+    let exit_code = match tsort_file(&args.file) {
+        Ok(code) => code,
+        Err(e) => {
+            eprintln!("{}: {}", pathname_display(&args.file), e);
+            1
+        }
+    };
 
     std::process::exit(exit_code)
 }