Skip to content

Commit 50aa9aa

Browse files
authored
Normalize_newlines
1 parent 3e276ca commit 50aa9aa

File tree

1 file changed

+96
-42
lines changed

1 file changed

+96
-42
lines changed

test/runtests.jl

Lines changed: 96 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ end
116116
@test String(doc[end]) == "</catalog>"
117117

118118
@testset "next and prev" begin
119-
@test XML.prev(doc[1]) == data # can't use === here because prev returns a copy of ctx
120119
@test XML.prev(doc[1]) == data # can't use === here because prev returns a copy of ctx
121120
@test prev(data) === nothing
122121
@test XML.next(doc[end]) === nothing
@@ -429,19 +428,18 @@ end
429428
@test XML.value(d2[1][6][1]) == " after default gap "
430429
@test XML.value(d2[1][7]) == "\n"
431430
end
432-
433-
# @testset "XML whitespace vs Unicode whitespace" begin
434-
# nbsp = "\u00A0"
435-
# s = """<root>
436-
# <a> x\t\n </a>
437-
# <b>$(nbsp) y $(nbsp)</b>
438-
# <c xml:space="default">$(nbsp) z $(nbsp)</c>
439-
# </root>"""
440-
# d = XML.parse(XML.Node, s)
441-
# @test XML.value(d[1][1][1]) == "x"
442-
# @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)"
443-
# @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)"
444-
# end
431+
@testset "XML whitespace vs Unicode whitespace" begin
432+
nbsp = "\u00A0"
433+
s = """<root>
434+
<a> x\t\n </a>
435+
<b>$(nbsp) y $(nbsp)</b>
436+
<c xml:space="default">$(nbsp) z $(nbsp)</c>
437+
</root>"""
438+
d = XML.parse(XML.Node, s)
439+
@test XML.value(d[1][1][1]) == "x"
440+
@test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)"
441+
@test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)"
442+
end
445443

446444
@testset "CDATA/Comment/PI boundaries" begin
447445
s = """<root>
@@ -486,17 +484,21 @@ end
486484
@test XML.value(d[1][1]) == "a"
487485
end
488486

489-
# @testset "entities expanding to whitespace" begin
490-
# s = """<root>
491-
# <a> &#x20; a &#x0A; </a>
492-
# <b xml:space="preserve">&#x20; b &#x0A;</b>
493-
# <c>&#xA0;c&#xA0;</c>
494-
# </root>"""
495-
# d = XML.parse(XML.Node, s)
496-
# @test XML.value(d[1][1][1]) == "a"
497-
# @test XML.value(d[1][2][1]) == " b \n"
498-
# @test XML.value(d[1][3][1]) == "\u00A0c\u00A0"
499-
# end
487+
@testset "entities expanding to whitespace" begin
488+
chr1="\u0020"
489+
chr2="\u000A"
490+
chr3="\u00A0"
491+
492+
s = """<root>
493+
<a> $(chr1) a $(chr2) </a>
494+
<b xml:space="preserve">$(chr1) b $(chr2)</b>
495+
<c>$(chr3)c$(chr3)</c>
496+
</root>"""
497+
d = XML.parse(XML.Node, s)
498+
@test XML.value(d[1][1][1]) == "a"
499+
@test XML.value(d[1][2][1]) == " b \n"
500+
@test XML.value(d[1][3][1]) == "$(chr3)c$(chr3)"
501+
end
500502

501503
@testset "invalid values and placement" begin
502504
s_bad = """<root><x xml:space="weird"> t </x></root>"""
@@ -535,23 +537,22 @@ end
535537
@test reverse(back)[2:end] == toks[1:end-1]
536538
end
537539

538-
# @testset "write/read roundtrip extremes" begin
539-
# XML.write doesn't respect xml:space="preserve" in the current implementation so roundtrip isn't possible.
540-
# xml = """<root>
541-
# <p xml:space="preserve"> </p>
542-
# <q> </q>
543-
# <r xml:space="default"> r </r>
544-
# <s xml:space="preserve"> pre <t/> post </s>
545-
# </root>"""
546-
# n = XML.parse(XML.Node, xml)
547-
# io = IOBuffer(); XML.write(io, n)
548-
# n2 = XML.parse(XML.Node, String(take!(io)))
549-
# @test n == n2
550-
# @test XML.write(n2[1][1]) == "<p xml:space=\"preserve\"> </p>"
551-
# @test XML.write(n2[1][2]) == "<q/>"
552-
# @test XML.value(n2[1][3][1]) == "r"
553-
# @test XML.write(n2[1][4]) == "<s xml:space=\"preserve\"> pre <t/> post </s>"
554-
# end
540+
@testset "write/read roundtrip extremes" begin
541+
xml = """<root>
542+
<p xml:space="preserve"> </p>
543+
<q> </q>
544+
<r xml:space="default"> r </r>
545+
<s xml:space="preserve"> pre <t/> post </s>
546+
</root>"""
547+
n = XML.parse(XML.Node, xml)
548+
io = IOBuffer(); XML.write(io, n)
549+
n2 = XML.parse(XML.Node, String(take!(io)))
550+
@test n == n2
551+
@test XML.write(n2[1][1]) == "<p xml:space=\"preserve\"> </p>"
552+
@test XML.write(n2[1][2]) == "<q/>"
553+
@test XML.value(n2[1][3][1]) == "r"
554+
@test XML.write(n2[1][4]) == "<s xml:space=\"preserve\"> pre <t/> post </s>"
555+
end
555556

556557
@testset "self-closing/empty/whitespace-only children" begin
557558
s = """<root>
@@ -578,6 +579,58 @@ end
578579

579580
end
580581

582+
#-----------------------------------------------------------------------------# Normalize_newlines
583+
# Helper to make writing tests easier
584+
to_bytes(s) = Vector{UInt8}(s)
585+
from_bytes(b) = String(b)
586+
587+
@testset "normalize_newlines" begin
588+
# 1. Lone CR -> LF
589+
@test XML.normalize_newlines(to_bytes("a\rb")) == to_bytes("a\nb")
590+
591+
# 2. CRLF -> LF
592+
@test XML.normalize_newlines(to_bytes("a\r\nb")) == to_bytes("a\nb")
593+
594+
# 3. CR NEL (0x85) -> LF
595+
@test XML.normalize_newlines(UInt8[0x61, 0x0D, 0x85, 0x62]) == to_bytes("a\nb")
596+
597+
# 4. NEL (U+0085) UTF-8 form 0xC2 0x85 -> LF
598+
@test XML.normalize_newlines(UInt8[0x61, 0xC2, 0x85, 0x62]) == to_bytes("a\nb")
599+
600+
# 5. LINE SEPARATOR (U+2028) UTF-8 form 0xE2 0x80 0xA8 -> LF
601+
@test XML.normalize_newlines(UInt8[0x61, 0xE2, 0x80, 0xA8, 0x62]) == to_bytes("a\nb")
602+
603+
# 6. Mixed newline types in one string
604+
mixed = UInt8[0x61, 0x0D, 0x0A, 0x62, 0xC2, 0x85, 0x63, 0xE2, 0x80, 0xA8, 0x64, 0x0D, 0x65]
605+
expected = to_bytes("a\nb\nc\nd\ne")
606+
@test XML.normalize_newlines(mixed) == expected
607+
608+
# 7. Consecutive CRs
609+
@test XML.normalize_newlines(to_bytes("a\r\rb")) == to_bytes("a\n\nb")
610+
611+
# 8. Leading/trailing newlines
612+
@test XML.normalize_newlines(to_bytes("\rabc\r")) == to_bytes("\nabc\n")
613+
614+
# 9. Empty input
615+
@test XML.normalize_newlines(UInt8[]) == UInt8[]
616+
617+
# 10. No newline characters
618+
@test XML.normalize_newlines(to_bytes("abcdef")) == to_bytes("abcdef")
619+
620+
# 11. Unicode safety: multi-byte chars around newlines
621+
s = "α\r\nβ" # α = 0xCE 0xB1, β = 0xCE 0xB2
622+
@test XML.normalize_newlines(to_bytes(s)) == to_bytes("α\nβ")
623+
624+
# 12. Boundary case: CR at end of buffer
625+
@test XML.normalize_newlines(UInt8[0x61, 0x0D]) == to_bytes("a\n")
626+
627+
# 13. Boundary case: 0xC2 at end (incomplete UTF-8 NEL)
628+
@test XML.normalize_newlines(UInt8[0x61, 0xC2]) == UInt8[0x61, 0xC2]
629+
630+
# 14. Boundary case: 0xE2 0x80 at end (incomplete LINE SEPARATOR)
631+
@test XML.normalize_newlines(UInt8[0x61, 0xE2, 0x80]) == UInt8[0x61, 0xE2, 0x80]
632+
end
633+
581634
#-----------------------------------------------------------------------------# roundtrip
582635
@testset "read/write/read roundtrip" begin
583636
for path in all_files
@@ -642,3 +695,4 @@ end
642695
xyz = XML.Element("point"; kw...)
643696
@test collect(keys(attributes(xyz))) == string.(collect('a':'z'))
644697
end
698+

0 commit comments

Comments
 (0)