|
3 | 3 | // For the full copyright and license information, please view the LICENSE |
4 | 4 | // file that was distributed with this source code. |
5 | 5 | // |
6 | | -// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää |
| 6 | +// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää févr |
7 | 7 | use uutests::{at_and_ucmd, new_ucmd, util::TestScenario, util_name}; |
8 | 8 |
|
9 | 9 | #[test] |
@@ -209,23 +209,24 @@ fn test_number_separator() { |
209 | 209 | #[test] |
210 | 210 | #[cfg(target_os = "linux")] |
211 | 211 | fn test_number_separator_non_utf8() { |
212 | | - use std::{ |
213 | | - ffi::{OsStr, OsString}, |
214 | | - os::unix::ffi::{OsStrExt, OsStringExt}, |
215 | | - }; |
| 212 | + use std::{ffi::OsString, os::unix::ffi::OsStringExt}; |
216 | 213 |
|
217 | 214 | let separator_bytes = [0xFF, 0xFE]; |
218 | 215 | let mut v = b"--number-separator=".to_vec(); |
219 | 216 | v.extend_from_slice(&separator_bytes); |
220 | 217 |
|
221 | 218 | let arg = OsString::from_vec(v); |
222 | | - let separator = OsStr::from_bytes(&separator_bytes); |
| 219 | + |
| 220 | + // Raw bytes should be preserved in the separator output |
| 221 | + let mut expected = b" 1".to_vec(); |
| 222 | + expected.extend_from_slice(&separator_bytes); |
| 223 | + expected.extend_from_slice(b"test\n"); |
223 | 224 |
|
224 | 225 | new_ucmd!() |
225 | 226 | .arg(arg) |
226 | 227 | .pipe_in("test") |
227 | 228 | .succeeds() |
228 | | - .stdout_is(format!(" 1{}test\n", separator.to_string_lossy())); |
| 229 | + .stdout_is_bytes(expected); |
229 | 230 | } |
230 | 231 |
|
231 | 232 | #[test] |
@@ -791,14 +792,24 @@ fn test_file_with_non_utf8_content() { |
791 | 792 |
|
792 | 793 | let filename = "file"; |
793 | 794 | let content: &[u8] = b"a\n\xFF\xFE\nb"; |
794 | | - let invalid_utf8: &[u8] = b"\xFF\xFE"; |
795 | 795 |
|
796 | 796 | at.write_bytes(filename, content); |
797 | 797 |
|
798 | | - ucmd.arg(filename).succeeds().stdout_is(format!( |
799 | | - " 1\ta\n 2\t{}\n 3\tb\n", |
800 | | - String::from_utf8_lossy(invalid_utf8) |
801 | | - )); |
| 798 | + // Raw bytes should be preserved in output (not converted to UTF-8 replacement chars) |
| 799 | + let expected: Vec<u8> = b" 1\ta\n 2\t\xFF\xFE\n 3\tb\n".to_vec(); |
| 800 | + ucmd.arg(filename).succeeds().stdout_is_bytes(expected); |
| 801 | +} |
| 802 | + |
| 803 | +#[test] |
| 804 | +fn test_stdin_non_utf8_preserved() { |
| 805 | + // Verify that non-UTF8 bytes are preserved in output, not converted to replacement chars |
| 806 | + // This is important for locale compatibility |
| 807 | + let input: Vec<u8> = b"f\xe9vr.\n".to_vec(); // "févr." in Latin-1 |
| 808 | + let expected: Vec<u8> = b" 1\tf\xe9vr.\n".to_vec(); |
| 809 | + new_ucmd!() |
| 810 | + .pipe_in(input) |
| 811 | + .succeeds() |
| 812 | + .stdout_is_bytes(expected); |
802 | 813 | } |
803 | 814 |
|
804 | 815 | // Regression tests for issue #9132: repeated flags should use last value |
|
0 commit comments