|
579 | 579 |
|
580 | 580 | end |
581 | 581 |
|
582 | | -#-----------------------------------------------------------------------------# Normalize_newlines |
583 | | -# Helper to make writing tests easier |
584 | | -to_bytes(s) = Vector{UInt8}(s) |
585 | | -from_bytes(b) = String(b) |
586 | | - |
587 | | -@testset "normalize_newlines" begin |
588 | | - # 1. Lone CR -> LF |
589 | | - @test XML.normalize_newlines(to_bytes("a\rb")) == to_bytes("a\nb") |
590 | | - |
591 | | - # 2. CRLF -> LF |
592 | | - @test XML.normalize_newlines(to_bytes("a\r\nb")) == to_bytes("a\nb") |
593 | | - |
594 | | - # 3. CR NEL (0x85) -> LF |
595 | | - @test XML.normalize_newlines(UInt8[0x61, 0x0D, 0x85, 0x62]) == to_bytes("a\nb") |
596 | | - |
597 | | - # 4. NEL (U+0085) UTF-8 form 0xC2 0x85 -> LF |
598 | | - @test XML.normalize_newlines(UInt8[0x61, 0xC2, 0x85, 0x62]) == to_bytes("a\nb") |
599 | | - |
600 | | - # 5. LINE SEPARATOR (U+2028) UTF-8 form 0xE2 0x80 0xA8 -> LF |
601 | | - @test XML.normalize_newlines(UInt8[0x61, 0xE2, 0x80, 0xA8, 0x62]) == to_bytes("a\nb") |
602 | | - |
603 | | - # 6. Mixed newline types in one string |
604 | | - mixed = UInt8[0x61, 0x0D, 0x0A, 0x62, 0xC2, 0x85, 0x63, 0xE2, 0x80, 0xA8, 0x64, 0x0D, 0x65] |
605 | | - expected = to_bytes("a\nb\nc\nd\ne") |
606 | | - @test XML.normalize_newlines(mixed) == expected |
607 | | - |
608 | | - # 7. Consecutive CRs |
609 | | - @test XML.normalize_newlines(to_bytes("a\r\rb")) == to_bytes("a\n\nb") |
610 | | - |
611 | | - # 8. Leading/trailing newlines |
612 | | - @test XML.normalize_newlines(to_bytes("\rabc\r")) == to_bytes("\nabc\n") |
613 | | - |
614 | | - # 9. Empty input |
615 | | - @test XML.normalize_newlines(UInt8[]) == UInt8[] |
616 | | - |
617 | | - # 10. No newline characters |
618 | | - @test XML.normalize_newlines(to_bytes("abcdef")) == to_bytes("abcdef") |
619 | | - |
620 | | - # 11. Unicode safety: multi-byte chars around newlines |
621 | | - s = "α\r\nβ" # α = 0xCE 0xB1, β = 0xCE 0xB2 |
622 | | - @test XML.normalize_newlines(to_bytes(s)) == to_bytes("α\nβ") |
623 | | - |
624 | | - # 12. Boundary case: CR at end of buffer |
625 | | - @test XML.normalize_newlines(UInt8[0x61, 0x0D]) == to_bytes("a\n") |
626 | | - |
627 | | - # 13. Boundary case: 0xC2 at end (incomplete UTF-8 NEL) |
628 | | - @test XML.normalize_newlines(UInt8[0x61, 0xC2]) == UInt8[0x61, 0xC2] |
629 | | - |
630 | | - # 14. Boundary case: 0xE2 0x80 at end (incomplete LINE SEPARATOR) |
631 | | - @test XML.normalize_newlines(UInt8[0x61, 0xE2, 0x80]) == UInt8[0x61, 0xE2, 0x80] |
632 | | -end |
633 | | - |
634 | 582 | #-----------------------------------------------------------------------------# roundtrip |
635 | 583 | @testset "read/write/read roundtrip" begin |
636 | 584 | for path in all_files |
|
0 commit comments