diff --git a/mod_test/models.py b/mod_test/models.py index 1463a0f3..546eb49e 100644 --- a/mod_test/models.py +++ b/mod_test/models.py @@ -444,14 +444,26 @@ def generate_html_diff(self, base_path: str, to_view: bool = True) -> str: @staticmethod def read_lines(file_name: str) -> List[str]: """ - Try to load a file in different encodings. + Try to load a file in different encodings and normalize text content. :param file_name: The name to read lines from. :type file_name: str :return: A list of lines. :rtype: List[str] """ + # Define extensions that should NOT be normalized (binary) + binary_extensions = ('.bin', '.png', '.jpg', '.jpeg', '.ts', '.mp4', '.mkv', '.m4v') + is_binary = file_name.lower().endswith(binary_extensions) + + def normalize(lines: List[str]) -> List[str]: + if is_binary: + return lines + # Strip trailing whitespace and ensure consistent line endings + return [line.rstrip() + '\n' for line in lines] + try: - return open(file_name, encoding='utf8').readlines() + with open(file_name, encoding='utf8') as f: + return normalize(f.readlines()) except UnicodeDecodeError: - return open(file_name, encoding='cp1252').readlines() + with open(file_name, encoding='cp1252') as f: + return normalize(f.readlines()) diff --git a/tests/test_normalization.py b/tests/test_normalization.py new file mode 100644 index 00000000..b36720e4 --- /dev/null +++ b/tests/test_normalization.py @@ -0,0 +1,49 @@ +import unittest +import os +import tempfile +from mod_test.models import TestResultFile + +class TestNormalization(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.test_dir.cleanup() + + def test_text_normalization(self): + # File A: LF, no trailing spaces + file_a_path = os.path.join(self.test_dir.name, "file_a.txt") + with open(file_a_path, 'wb') as f: + f.write(b"line1\nline2\n") + + # File B: CRLF, trailing spaces + file_b_path = os.path.join(self.test_dir.name, "file_b.txt") + with open(file_b_path, 'wb') as f: + f.write(b"line1 \r\nline2\r\n") + + lines_a = TestResultFile.read_lines(file_a_path) + lines_b = TestResultFile.read_lines(file_b_path) + + self.assertEqual(lines_a, ["line1\n", "line2\n"]) + self.assertEqual(lines_b, ["line1\n", "line2\n"]) + self.assertEqual(lines_a, lines_b) + + def test_binary_exemption(self): + # Binary file: should NOT be modified + file_bin_path = os.path.join(self.test_dir.name, "test.bin") + content = b"line1 \r\nline2\r\n" + with open(file_bin_path, 'wb') as f: + f.write(content) + + lines = TestResultFile.read_lines(file_bin_path) + # readlines() on binary file with default open (universal newlines) may still change \r\n to \n + # but our normalize() should skip the rstrip part. + # Actually, Python's readlines() in text mode WITHOUT newline=None (default) transforms \r\n to \n. + # However, the requirement was to ensure binary files are not Corrupted or changed by our logic. + + # If we open in text mode, Python handles newlines. + # Let's check if our normalization code skips the rstrip. + self.assertEqual(lines, ["line1 \n", "line2\n"]) # line1 trailing spaces preserved + +if __name__ == '__main__': + unittest.main()