Create the side-by-side option (-y) feature for the diff command (Incomplete).

sami-daniel · sami-daniel · commit 463f5e2a5183 · 2025-04-22T13:11:06.000-03:00
- Create the function, in the utils package, limited_string that allows you to truncate a string based on a
delimiter (May break the encoding of the character where it was cut)

- Create tests for limited_string function

- Add support for -y and --side-by-side flags that enables diff output for side-by-side mode

- Create implementation of the diff -y (SideBySide) command, base command for sdiff, using the crate
diff as engine. Currently it does not fully represent GNU diff -y, some flags (|, (, ), , /) could
not be developed due to the limitation of the engine we currently use (crate diff), which did not
allow perform logic around it. Only the use of '&lt;' and '&gt;' were enabled.

- Create tests for SideBySide implementation
diff --git a/src/diff.rs b/src/diff.rs
@@ -5,7 +5,7 @@
 
 use crate::params::{parse_params, Format};
 use crate::utils::report_failure_to_read_input_file;
-use crate::{context_diff, ed_diff, normal_diff, unified_diff};
+use crate::{context_diff, ed_diff, normal_diff, unified_diff, side_diff};
 use std::env::ArgsOs;
 use std::ffi::OsString;
 use std::fs;
@@ -79,6 +79,7 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
             eprintln!("{error}");
             exit(2);
         }),
+        Format::SideBySide => side_diff::diff(&from_content, &to_content)
     };
     if params.brief && !result.is_empty() {
         println!(
diff --git a/src/lib.rs b/src/lib.rs
@@ -6,9 +6,11 @@ pub mod normal_diff;
 pub mod params;
 pub mod unified_diff;
 pub mod utils;
+pub mod side_diff;
 
 // Re-export the public functions/types you need
 pub use context_diff::diff as context_diff;
 pub use ed_diff::diff as ed_diff;
 pub use normal_diff::diff as normal_diff;
 pub use unified_diff::diff as unified_diff;
+pub use side_diff::diff as side_by_syde_diff;
diff --git a/src/main.rs b/src/main.rs
@@ -19,6 +19,7 @@ mod macros;
 mod normal_diff;
 mod params;
 mod unified_diff;
+mod side_diff;
 mod utils;
 
 /// # Panics
diff --git a/src/params.rs b/src/params.rs
@@ -11,6 +11,7 @@ pub enum Format {
     Unified,
     Context,
     Ed,
+    SideBySide
 }
 
 #[derive(Clone, Debug, Eq, PartialEq)]
@@ -101,6 +102,13 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
             format = Some(Format::Ed);
             continue;
         }
+        if param == "-y" || param == "--side-by-side" {
+            if format.is_some() && format != Some(Format::SideBySide) {
+                return Err("Conflicting output style option".to_string());
+            }
+            format = Some(Format::SideBySide);
+            continue;
+        }
         if tabsize_re.is_match(param.to_string_lossy().as_ref()) {
             // Because param matches the regular expression,
             // it is safe to assume it is valid UTF-8.
diff --git a/src/side_diff.rs b/src/side_diff.rs
@@ -0,0 +1,81 @@
+use crate::utils::limited_string;
+use diff::Result;
+use std::{
+    io::{stdout, StdoutLock, Write},
+    vec,
+};
+
+fn push_output(
+    output: &mut StdoutLock,
+    left_ln: &[u8],
+    right_ln: &[u8],
+    symbol: &[u8],
+    tab_size: usize,
+) -> std::io::Result<()> {
+    // The reason why this function exists, is that we cannot
+    // assume a enconding for our left or right line, and the
+    // writeln!() macro obligattes us to do it.
+
+    // side-by-side diff usually prints the output like:
+    // {left_line}{tab}{space_char}{symbol(|, < or >)}{space_char}{right_line}{EOL}
+
+    // recalculate how many spaces are nescessary, cause we need to take into
+    // consideration the lenght of the word before print it.
+    let tab_size = (tab_size as isize - left_ln.len() as isize).max(0);
+    let ident = vec![b' '; tab_size as usize];
+    output.write_all(left_ln)?; // {left_line}
+    output.write_all(&ident)?; // {tab}
+    output.write_all(b" ")?; // {space_char}
+    output.write_all(symbol)?; // {symbol}
+    output.write_all(b" ")?; // {space_char}
+    output.write_all(right_ln)?; // {right_line}
+    
+    writeln!(output)?; // {EOL}
+
+    Ok(())
+}
+
+pub fn diff(from_file: &Vec<u8>, to_file: &Vec<u8>) -> Vec<u8> {
+    //      ^ The left file  ^ The right file
+    
+    let mut output = stdout().lock();
+    let left_lines: Vec<&[u8]> = from_file.split(|&c| c == b'\n').collect();
+    let right_lines: Vec<&[u8]> = to_file.split(|&c| c == b'\n').collect();
+    let tab_size = 61; // for some reason the tab spaces are 61 not 60
+    for result in diff::slice(&left_lines, &right_lines) {
+        match result {
+            Result::Left(left_ln) => {
+                push_output(
+                    &mut output,
+                    &limited_string(left_ln, tab_size),
+                    &[],
+                    b"<",
+                    tab_size,
+                )
+                .unwrap();
+            }
+            Result::Right(right_ln) => {
+                push_output(
+                    &mut output,
+                    &[],
+                    &limited_string(right_ln, tab_size),
+                    b">",
+                    tab_size,
+                )
+                .unwrap();
+            }
+            Result::Both(left_ln, right_ln) => {
+                push_output(
+                    &mut output,
+                    &limited_string(left_ln, tab_size),
+                    &limited_string(right_ln, tab_size),
+                    b" ",
+                    tab_size,
+                )
+                .unwrap();
+            }
+        }
+    }
+
+    vec![]
+}
diff --git a/src/utils.rs b/src/utils.rs
@@ -3,9 +3,8 @@
 // For the full copyright and license information, please view the LICENSE-*
 // files that was distributed with this source code.
 
-use std::{ffi::OsString, io::Write};
-
 use regex::Regex;
+use std::{ffi::OsString, io::Write};
 use unicode_width::UnicodeWidthStr;
 
 /// Replace tabs by spaces in the input line.
@@ -99,6 +98,15 @@ pub fn report_failure_to_read_input_file(
     );
 }
 
+/// Limits a string at a certain limiter position. This can break the
+/// encoding of a specific char where it has been cut.
+#[must_use]
+pub fn limited_string<'a>(orig: &'a [u8], limiter: usize) -> &'a [u8] {
+    // TODO: Verify if we broke the enconding of the char
+    // when we cut it.
+    &orig[..orig.len().min(limiter)]
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -205,4 +213,64 @@ mod tests {
             assert!(m_time > current_time);
         }
     }
+
+    mod limited_string {
+        use super::*;
+        use std::str;
+
+        #[test]
+        fn empty_orig_returns_empty() {
+            let orig: &[u8] = b"";
+            let result = limited_string(&orig, 10);
+            assert!(result.is_empty());
+        }
+
+        #[test]
+        fn zero_limit_returns_empty() {
+            let orig: &[u8] = b"foo";
+            let result = limited_string(&orig, 0);
+            assert!(result.is_empty());
+        }
+
+        #[test]
+        fn limit_longer_than_orig_returns_full() {
+            let orig: &[u8] = b"foo";
+            let result = limited_string(&orig, 10);
+            assert_eq!(result, orig);
+        }
+
+        #[test]
+        fn ascii_limit_in_middle() {
+            let orig: &[u8] = b"foobar";
+            let result = limited_string(&orig, 3);
+            assert_eq!(result, b"foo");
+            assert!(str::from_utf8(&result).is_ok()); // All are ascii chars, we do not broke the enconding
+        }
+
+        #[test]
+        fn utf8_multibyte_cut_invalidates() {
+            let orig = "áéíóú".as_bytes(); 
+            let result = limited_string(&orig, 1);
+            // should contain only the first byte of mult-byte char
+            assert_eq!(result, vec![0xC3]);
+            assert!(str::from_utf8(&result).is_err());
+        }
+
+        #[test]
+        fn utf8_limit_at_codepoint_boundary() {
+            let orig = "áéí".as_bytes();
+            let bytes = &orig;
+            let result = limited_string(&orig, bytes.len());
+
+            assert_eq!(result, *bytes);
+            assert!(str::from_utf8(&result).is_ok());
+        }
+
+        #[test]
+        fn works_with_byte_vec_input() {
+            let orig_bytes = b"hello".to_vec();
+            let result = limited_string(&orig_bytes, 3);
+            assert_eq!(result, b"hel");
+        }
+    }
 }