|
116 | 116 | @test String(doc[end]) == "</catalog>" |
117 | 117 |
|
118 | 118 | @testset "next and prev" begin |
119 | | - @test XML.prev(doc[1]) == data # can't use === here because prev returns a copy of ctx |
120 | 119 | @test XML.prev(doc[1]) == data # can't use === here because prev returns a copy of ctx |
121 | 120 | @test prev(data) === nothing |
122 | 121 | @test XML.next(doc[end]) === nothing |
@@ -429,19 +428,18 @@ end |
429 | 428 | @test XML.value(d2[1][6][1]) == " after default gap " |
430 | 429 | @test XML.value(d2[1][7]) == "\n" |
431 | 430 | end |
432 | | - |
433 | | -# @testset "XML whitespace vs Unicode whitespace" begin |
434 | | -# nbsp = "\u00A0" |
435 | | -# s = """<root> |
436 | | -# <a> x\t\n </a> |
437 | | -# <b>$(nbsp) y $(nbsp)</b> |
438 | | -# <c xml:space="default">$(nbsp) z $(nbsp)</c> |
439 | | -# </root>""" |
440 | | -# d = XML.parse(XML.Node, s) |
441 | | -# @test XML.value(d[1][1][1]) == "x" |
442 | | -# @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)" |
443 | | -# @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)" |
444 | | -# end |
| 431 | + @testset "XML whitespace vs Unicode whitespace" begin |
| 432 | + nbsp = "\u00A0" |
| 433 | + s = """<root> |
| 434 | + <a> x\t\n </a> |
| 435 | + <b>$(nbsp) y $(nbsp)</b> |
| 436 | + <c xml:space="default">$(nbsp) z $(nbsp)</c> |
| 437 | + </root>""" |
| 438 | + d = XML.parse(XML.Node, s) |
| 439 | + @test XML.value(d[1][1][1]) == "x" |
| 440 | + @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)" |
| 441 | + @test XML.value(d[1][3][1]) == "$(nbsp) z $(nbsp)" |
| 442 | + end |
445 | 443 |
|
446 | 444 | @testset "CDATA/Comment/PI boundaries" begin |
447 | 445 | s = """<root> |
@@ -486,17 +484,21 @@ end |
486 | 484 | @test XML.value(d[1][1]) == "a" |
487 | 485 | end |
488 | 486 |
|
489 | | -# @testset "entities expanding to whitespace" begin |
490 | | -# s = """<root> |
491 | | -# <a>   a 
 </a> |
492 | | -# <b xml:space="preserve">  b 
</b> |
493 | | -# <c> c </c> |
494 | | -# </root>""" |
495 | | -# d = XML.parse(XML.Node, s) |
496 | | -# @test XML.value(d[1][1][1]) == "a" |
497 | | -# @test XML.value(d[1][2][1]) == " b \n" |
498 | | -# @test XML.value(d[1][3][1]) == "\u00A0c\u00A0" |
499 | | -# end |
| 487 | + @testset "entities expanding to whitespace" begin |
| 488 | + chr1="\u0020" |
| 489 | + chr2="\u000A" |
| 490 | + chr3="\u00A0" |
| 491 | + |
| 492 | + s = """<root> |
| 493 | + <a> $(chr1) a $(chr2) </a> |
| 494 | + <b xml:space="preserve">$(chr1) b $(chr2)</b> |
| 495 | + <c>$(chr3)c$(chr3)</c> |
| 496 | + </root>""" |
| 497 | + d = XML.parse(XML.Node, s) |
| 498 | + @test XML.value(d[1][1][1]) == "a" |
| 499 | + @test XML.value(d[1][2][1]) == " b \n" |
| 500 | + @test XML.value(d[1][3][1]) == "$(chr3)c$(chr3)" |
| 501 | + end |
500 | 502 |
|
501 | 503 | @testset "invalid values and placement" begin |
502 | 504 | s_bad = """<root><x xml:space="weird"> t </x></root>""" |
@@ -535,23 +537,22 @@ end |
535 | 537 | @test reverse(back)[2:end] == toks[1:end-1] |
536 | 538 | end |
537 | 539 |
|
538 | | -# @testset "write/read roundtrip extremes" begin |
539 | | - # XML.write doesn't respect xml:space="preserve" in the current implementation so roundtrip isn't possible. |
540 | | -# xml = """<root> |
541 | | -# <p xml:space="preserve"> </p> |
542 | | -# <q> </q> |
543 | | -# <r xml:space="default"> r </r> |
544 | | -# <s xml:space="preserve"> pre <t/> post </s> |
545 | | -# </root>""" |
546 | | -# n = XML.parse(XML.Node, xml) |
547 | | -# io = IOBuffer(); XML.write(io, n) |
548 | | -# n2 = XML.parse(XML.Node, String(take!(io))) |
549 | | -# @test n == n2 |
550 | | -# @test XML.write(n2[1][1]) == "<p xml:space=\"preserve\"> </p>" |
551 | | -# @test XML.write(n2[1][2]) == "<q/>" |
552 | | -# @test XML.value(n2[1][3][1]) == "r" |
553 | | -# @test XML.write(n2[1][4]) == "<s xml:space=\"preserve\"> pre <t/> post </s>" |
554 | | -# end |
| 540 | + @testset "write/read roundtrip extremes" begin |
| 541 | + xml = """<root> |
| 542 | + <p xml:space="preserve"> </p> |
| 543 | + <q> </q> |
| 544 | + <r xml:space="default"> r </r> |
| 545 | + <s xml:space="preserve"> pre <t/> post </s> |
| 546 | + </root>""" |
| 547 | + n = XML.parse(XML.Node, xml) |
| 548 | + io = IOBuffer(); XML.write(io, n) |
| 549 | + n2 = XML.parse(XML.Node, String(take!(io))) |
| 550 | + @test n == n2 |
| 551 | + @test XML.write(n2[1][1]) == "<p xml:space=\"preserve\"> </p>" |
| 552 | + @test XML.write(n2[1][2]) == "<q/>" |
| 553 | + @test XML.value(n2[1][3][1]) == "r" |
| 554 | + @test XML.write(n2[1][4]) == "<s xml:space=\"preserve\"> pre <t/> post </s>" |
| 555 | + end |
555 | 556 |
|
556 | 557 | @testset "self-closing/empty/whitespace-only children" begin |
557 | 558 | s = """<root> |
|
578 | 579 |
|
579 | 580 | end |
580 | 581 |
|
| 582 | +#-----------------------------------------------------------------------------# Normalize_newlines |
| 583 | +# Helper to make writing tests easier |
| 584 | +to_bytes(s) = Vector{UInt8}(s) |
| 585 | +from_bytes(b) = String(b) |
| 586 | + |
| 587 | +@testset "normalize_newlines" begin |
| 588 | + # 1. Lone CR -> LF |
| 589 | + @test XML.normalize_newlines(to_bytes("a\rb")) == to_bytes("a\nb") |
| 590 | + |
| 591 | + # 2. CRLF -> LF |
| 592 | + @test XML.normalize_newlines(to_bytes("a\r\nb")) == to_bytes("a\nb") |
| 593 | + |
| 594 | + # 3. CR NEL (0x85) -> LF |
| 595 | + @test XML.normalize_newlines(UInt8[0x61, 0x0D, 0x85, 0x62]) == to_bytes("a\nb") |
| 596 | + |
| 597 | + # 4. NEL (U+0085) UTF-8 form 0xC2 0x85 -> LF |
| 598 | + @test XML.normalize_newlines(UInt8[0x61, 0xC2, 0x85, 0x62]) == to_bytes("a\nb") |
| 599 | + |
| 600 | + # 5. LINE SEPARATOR (U+2028) UTF-8 form 0xE2 0x80 0xA8 -> LF |
| 601 | + @test XML.normalize_newlines(UInt8[0x61, 0xE2, 0x80, 0xA8, 0x62]) == to_bytes("a\nb") |
| 602 | + |
| 603 | + # 6. Mixed newline types in one string |
| 604 | + mixed = UInt8[0x61, 0x0D, 0x0A, 0x62, 0xC2, 0x85, 0x63, 0xE2, 0x80, 0xA8, 0x64, 0x0D, 0x65] |
| 605 | + expected = to_bytes("a\nb\nc\nd\ne") |
| 606 | + @test XML.normalize_newlines(mixed) == expected |
| 607 | + |
| 608 | + # 7. Consecutive CRs |
| 609 | + @test XML.normalize_newlines(to_bytes("a\r\rb")) == to_bytes("a\n\nb") |
| 610 | + |
| 611 | + # 8. Leading/trailing newlines |
| 612 | + @test XML.normalize_newlines(to_bytes("\rabc\r")) == to_bytes("\nabc\n") |
| 613 | + |
| 614 | + # 9. Empty input |
| 615 | + @test XML.normalize_newlines(UInt8[]) == UInt8[] |
| 616 | + |
| 617 | + # 10. No newline characters |
| 618 | + @test XML.normalize_newlines(to_bytes("abcdef")) == to_bytes("abcdef") |
| 619 | + |
| 620 | + # 11. Unicode safety: multi-byte chars around newlines |
| 621 | + s = "α\r\nβ" # α = 0xCE 0xB1, β = 0xCE 0xB2 |
| 622 | + @test XML.normalize_newlines(to_bytes(s)) == to_bytes("α\nβ") |
| 623 | + |
| 624 | + # 12. Boundary case: CR at end of buffer |
| 625 | + @test XML.normalize_newlines(UInt8[0x61, 0x0D]) == to_bytes("a\n") |
| 626 | + |
| 627 | + # 13. Boundary case: 0xC2 at end (incomplete UTF-8 NEL) |
| 628 | + @test XML.normalize_newlines(UInt8[0x61, 0xC2]) == UInt8[0x61, 0xC2] |
| 629 | + |
| 630 | + # 14. Boundary case: 0xE2 0x80 at end (incomplete LINE SEPARATOR) |
| 631 | + @test XML.normalize_newlines(UInt8[0x61, 0xE2, 0x80]) == UInt8[0x61, 0xE2, 0x80] |
| 632 | +end |
| 633 | + |
581 | 634 | #-----------------------------------------------------------------------------# roundtrip |
582 | 635 | @testset "read/write/read roundtrip" begin |
583 | 636 | for path in all_files |
|
642 | 695 | xyz = XML.Element("point"; kw...) |
643 | 696 | @test collect(keys(attributes(xyz))) == string.(collect('a':'z')) |
644 | 697 | end |
| 698 | + |
0 commit comments