@@ -1396,4 +1396,388 @@ mod tests {
13961396 // Third paragraph (index 2) has no comment
13971397 assert ! ( ranges. get_comment_ids( 2 ) . is_none( ) ) ;
13981398 }
1399+
1400+ // ==================== Sprint 5: Edge Case Tests ====================
1401+
1402+ #[ test]
1403+ fn test_comments_empty_xml ( ) {
1404+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1405+ <w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1406+ </w:comments>"# ;
1407+
1408+ let comments = Comments :: parse ( xml) ;
1409+ assert ! ( comments. get( 0 ) . is_none( ) ) ;
1410+ assert ! ( comments. get_language( 0 ) . is_none( ) ) ;
1411+ }
1412+
1413+ #[ test]
1414+ fn test_comments_no_language_prefix ( ) {
1415+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1416+ <w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1417+ <w:comment w:id="0">
1418+ <w:p><w:r><w:t>Just a regular comment</w:t></w:r></w:p>
1419+ </w:comment>
1420+ </w:comments>"# ;
1421+
1422+ let comments = Comments :: parse ( xml) ;
1423+ assert_eq ! ( comments. get( 0 ) , Some ( "Just a regular comment" ) ) ;
1424+ assert_eq ! ( comments. get_language( 0 ) , None ) ; // No "Language:" prefix
1425+ }
1426+
1427+ #[ test]
1428+ fn test_comments_malformed_id ( ) {
1429+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1430+ <w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1431+ <w:comment w:id="not_a_number">
1432+ <w:p><w:r><w:t>Bad ID</w:t></w:r></w:p>
1433+ </w:comment>
1434+ </w:comments>"# ;
1435+
1436+ let comments = Comments :: parse ( xml) ;
1437+ // Malformed ID should be skipped gracefully
1438+ assert ! ( comments. get( 0 ) . is_none( ) ) ;
1439+ }
1440+
1441+ #[ test]
1442+ fn test_comment_ranges_empty_document ( ) {
1443+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1444+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1445+ <w:body></w:body>
1446+ </w:document>"# ;
1447+
1448+ let ranges = CommentRanges :: parse ( xml) ;
1449+ assert ! ( ranges. get_comment_ids( 0 ) . is_none( ) ) ;
1450+ }
1451+
1452+ #[ test]
1453+ fn test_comment_ranges_with_table ( ) {
1454+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1455+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1456+ <w:body>
1457+ <w:p><w:r><w:t>Before table</w:t></w:r></w:p>
1458+ <w:tbl>
1459+ <w:tr><w:tc><w:p><w:r><w:t>Cell</w:t></w:r></w:p></w:tc></w:tr>
1460+ </w:tbl>
1461+ <w:p><w:r><w:t>After table</w:t></w:r></w:p>
1462+ </w:body>
1463+ </w:document>"# ;
1464+
1465+ let ranges = CommentRanges :: parse ( xml) ;
1466+ // Block indices: 0=para, 1=table, 2=para
1467+ assert ! ( ranges. get_comment_ids( 0 ) . is_none( ) ) ;
1468+ assert ! ( ranges. get_comment_ids( 1 ) . is_none( ) ) ;
1469+ assert ! ( ranges. get_comment_ids( 2 ) . is_none( ) ) ;
1470+ }
1471+
1472+ #[ test]
1473+ fn test_metadata_parse_empty ( ) {
1474+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1475+ <cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties">
1476+ </cp:coreProperties>"# ;
1477+
1478+ let metadata = DocumentMetadata :: parse ( xml) ;
1479+ assert ! ( metadata. title. is_none( ) ) ;
1480+ assert ! ( metadata. author. is_none( ) ) ;
1481+ assert ! ( metadata. created. is_none( ) ) ;
1482+ }
1483+
1484+ #[ test]
1485+ fn test_metadata_parse_partial ( ) {
1486+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1487+ <cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
1488+ xmlns:dc="http://purl.org/dc/elements/1.1/"
1489+ xmlns:dcterms="http://purl.org/dc/terms/">
1490+ <dc:title>Test Document</dc:title>
1491+ <dc:creator>Test Author</dc:creator>
1492+ </cp:coreProperties>"# ;
1493+
1494+ let metadata = DocumentMetadata :: parse ( xml) ;
1495+ assert_eq ! ( metadata. title, Some ( "Test Document" . to_string( ) ) ) ;
1496+ assert_eq ! ( metadata. author, Some ( "Test Author" . to_string( ) ) ) ;
1497+ assert ! ( metadata. subject. is_none( ) ) ;
1498+ assert ! ( metadata. modified. is_none( ) ) ;
1499+ }
1500+
1501+ #[ test]
1502+ fn test_metadata_to_asciidoc_header_empty ( ) {
1503+ let metadata = DocumentMetadata :: default ( ) ;
1504+ let header = metadata. to_asciidoc_header ( ) ;
1505+ assert ! ( header. is_empty( ) ) ;
1506+ }
1507+
1508+ #[ test]
1509+ fn test_metadata_to_asciidoc_header_with_author ( ) {
1510+ let metadata = DocumentMetadata {
1511+ author : Some ( "John Doe" . to_string ( ) ) ,
1512+ modified : Some ( "2025-01-15T10:30:00Z" . to_string ( ) ) ,
1513+ ..Default :: default ( )
1514+ } ;
1515+ let header = metadata. to_asciidoc_header ( ) ;
1516+ assert ! ( header. contains( ":author: John Doe" ) ) ;
1517+ assert ! ( header. contains( ":revdate: 2025-01-15" ) ) ;
1518+ }
1519+
1520+ #[ test]
1521+ fn test_metadata_to_asciidoc_header_empty_author ( ) {
1522+ let metadata = DocumentMetadata {
1523+ author : Some ( "" . to_string ( ) ) , // Empty string
1524+ ..Default :: default ( )
1525+ } ;
1526+ let header = metadata. to_asciidoc_header ( ) ;
1527+ // Empty author should not produce :author: line
1528+ assert ! ( !header. contains( ":author:" ) ) ;
1529+ }
1530+
1531+ #[ test]
1532+ fn test_extractor_with_force_parse ( ) {
1533+ let extractor = AsciiDocExtractor :: new ( ) . with_force_parse ( true ) ;
1534+ assert ! ( extractor. force_parse) ;
1535+
1536+ let extractor2 = AsciiDocExtractor :: new ( ) . with_force_parse ( false ) ;
1537+ assert ! ( !extractor2. force_parse) ;
1538+ }
1539+
1540+ #[ test]
1541+ fn test_style_mappings_to_toml_empty ( ) {
1542+ let mappings = StyleMappings :: default ( ) ;
1543+ let toml = mappings. to_toml ( ) ;
1544+ assert ! ( toml. contains( "[styles]" ) ) ;
1545+ // Should not have any style mappings
1546+ assert ! ( !toml. contains( "heading" ) ) ;
1547+ assert ! ( !toml. contains( "paragraph" ) ) ;
1548+ }
1549+
1550+ #[ test]
1551+ fn test_style_mappings_to_toml_with_tables ( ) {
1552+ let mappings = StyleMappings {
1553+ tables : vec ! [ "TableGrid" . to_string( ) , "TableSimple" . to_string( ) ] ,
1554+ ..Default :: default ( )
1555+ } ;
1556+ let toml = mappings. to_toml ( ) ;
1557+ // Only first table should be included
1558+ assert ! ( toml. contains( "table = \" TableGrid\" " ) ) ;
1559+ }
1560+
1561+ #[ test]
1562+ fn test_convert_empty_table ( ) {
1563+ let extractor = AsciiDocExtractor :: new ( ) ;
1564+ let table = Table {
1565+ rows : vec ! [ ] ,
1566+ style_id : None ,
1567+ } ;
1568+
1569+ let result = extractor. convert_table ( & table) ;
1570+ // Empty table should produce empty output
1571+ assert ! ( result. is_empty( ) ) ;
1572+ }
1573+
1574+ #[ test]
1575+ fn test_is_numbered_list_heuristic ( ) {
1576+ let extractor = AsciiDocExtractor :: new ( ) ;
1577+ let styles = StyleSheet :: default ( ) ;
1578+
1579+ // numId 2 is ordered list by convention
1580+ assert ! ( extractor. is_numbered_list( 2 , & styles) ) ;
1581+ // numId 1 is unordered list
1582+ assert ! ( !extractor. is_numbered_list( 1 , & styles) ) ;
1583+ // Other numIds default to unordered
1584+ assert ! ( !extractor. is_numbered_list( 0 , & styles) ) ;
1585+ assert ! ( !extractor. is_numbered_list( 3 , & styles) ) ;
1586+ }
1587+
1588+ #[ test]
1589+ fn test_is_code_block_paragraph_mixed_formatting ( ) {
1590+ let extractor = AsciiDocExtractor :: new ( ) ;
1591+
1592+ // Mixed monospace and non-monospace - NOT a code block
1593+ let para = Paragraph {
1594+ style_id : None ,
1595+ numbering : None ,
1596+ children : vec ! [
1597+ ParagraphChild :: Run ( Run {
1598+ text: "regular " . to_string( ) ,
1599+ bold: false ,
1600+ italic: false ,
1601+ monospace: false ,
1602+ } ) ,
1603+ ParagraphChild :: Run ( Run {
1604+ text: "code\n more" . to_string( ) ,
1605+ bold: false ,
1606+ italic: false ,
1607+ monospace: true ,
1608+ } ) ,
1609+ ] ,
1610+ } ;
1611+
1612+ assert ! ( !extractor. is_code_block_paragraph( & para) ) ;
1613+ }
1614+
1615+ #[ test]
1616+ fn test_is_code_block_paragraph_all_monospace_with_newline ( ) {
1617+ let extractor = AsciiDocExtractor :: new ( ) ;
1618+
1619+ // All monospace with newline - IS a code block
1620+ let para = Paragraph {
1621+ style_id : None ,
1622+ numbering : None ,
1623+ children : vec ! [ ParagraphChild :: Run ( Run {
1624+ text: "fn main() {\n println!(\" Hello\" );\n }" . to_string( ) ,
1625+ bold: false ,
1626+ italic: false ,
1627+ monospace: true ,
1628+ } ) ] ,
1629+ } ;
1630+
1631+ assert ! ( extractor. is_code_block_paragraph( & para) ) ;
1632+ }
1633+
1634+ #[ test]
1635+ fn test_is_code_block_paragraph_monospace_no_newline ( ) {
1636+ let extractor = AsciiDocExtractor :: new ( ) ;
1637+
1638+ // Monospace but no newline - NOT a code block (inline code)
1639+ let para = Paragraph {
1640+ style_id : None ,
1641+ numbering : None ,
1642+ children : vec ! [ ParagraphChild :: Run ( Run {
1643+ text: "inline_code" . to_string( ) ,
1644+ bold: false ,
1645+ italic: false ,
1646+ monospace: true ,
1647+ } ) ] ,
1648+ } ;
1649+
1650+ assert ! ( !extractor. is_code_block_paragraph( & para) ) ;
1651+ }
1652+
1653+ #[ test]
1654+ fn test_get_raw_paragraph_text_with_hyperlink ( ) {
1655+ let extractor = AsciiDocExtractor :: new ( ) ;
1656+
1657+ let para = Paragraph {
1658+ style_id : None ,
1659+ numbering : None ,
1660+ children : vec ! [
1661+ ParagraphChild :: Run ( Run {
1662+ text: "Click " . to_string( ) ,
1663+ bold: false ,
1664+ italic: false ,
1665+ monospace: false ,
1666+ } ) ,
1667+ ParagraphChild :: Hyperlink ( Hyperlink {
1668+ id: None ,
1669+ anchor: Some ( "target" . to_string( ) ) ,
1670+ runs: vec![ Run {
1671+ text: "here" . to_string( ) ,
1672+ bold: false ,
1673+ italic: false ,
1674+ monospace: false ,
1675+ } ] ,
1676+ } ) ,
1677+ ParagraphChild :: Run ( Run {
1678+ text: " for more." . to_string( ) ,
1679+ bold: false ,
1680+ italic: false ,
1681+ monospace: false ,
1682+ } ) ,
1683+ ] ,
1684+ } ;
1685+
1686+ let text = extractor. get_raw_paragraph_text ( & para) ;
1687+ assert_eq ! ( text, "Click here for more." ) ;
1688+ }
1689+
1690+ #[ test]
1691+ fn test_get_raw_paragraph_text_with_image ( ) {
1692+ let extractor = AsciiDocExtractor :: new ( ) ;
1693+
1694+ let para = Paragraph {
1695+ style_id : None ,
1696+ numbering : None ,
1697+ children : vec ! [
1698+ ParagraphChild :: Run ( Run {
1699+ text: "See " . to_string( ) ,
1700+ bold: false ,
1701+ italic: false ,
1702+ monospace: false ,
1703+ } ) ,
1704+ ParagraphChild :: Image ( crate :: image:: Image {
1705+ id: 1 ,
1706+ rel_id: "rId1" . to_string( ) ,
1707+ target: "image.png" . to_string( ) ,
1708+ alt: Some ( "diagram" . to_string( ) ) ,
1709+ name: None ,
1710+ width_emu: None ,
1711+ height_emu: None ,
1712+ position: crate :: image:: ImagePosition :: Inline ,
1713+ } ) ,
1714+ ] ,
1715+ } ;
1716+
1717+ let text = extractor. get_raw_paragraph_text ( & para) ;
1718+ assert_eq ! ( text, "See diagram" ) ;
1719+ }
1720+
1721+ #[ test]
1722+ fn test_convert_hyperlink_no_target ( ) {
1723+ let extractor = AsciiDocExtractor :: new ( ) ;
1724+
1725+ // Hyperlink with no id and no anchor
1726+ let hyperlink = Hyperlink {
1727+ id : None ,
1728+ anchor : None ,
1729+ runs : vec ! [ Run {
1730+ text: "orphan link" . to_string( ) ,
1731+ bold: false ,
1732+ italic: false ,
1733+ monospace: false ,
1734+ } ] ,
1735+ } ;
1736+
1737+ let result = extractor. convert_hyperlink ( & hyperlink, None ) ;
1738+ // Should produce <<,orphan link>> with empty anchor
1739+ assert ! ( result. contains( "orphan link" ) ) ;
1740+ }
1741+
1742+ #[ test]
1743+ fn test_source_origin_variants ( ) {
1744+ assert_eq ! ( SourceOrigin :: Embedded , SourceOrigin :: Embedded ) ;
1745+ assert_eq ! ( SourceOrigin :: Parsed , SourceOrigin :: Parsed ) ;
1746+ assert_ne ! ( SourceOrigin :: Embedded , SourceOrigin :: Parsed ) ;
1747+ }
1748+
1749+ #[ test]
1750+ fn test_convert_run_preserve_formatting_disabled ( ) {
1751+ let mut extractor = AsciiDocExtractor :: new ( ) ;
1752+ extractor. preserve_formatting = false ;
1753+
1754+ let run = Run {
1755+ text : "bold text" . to_string ( ) ,
1756+ bold : true ,
1757+ italic : false ,
1758+ monospace : false ,
1759+ } ;
1760+
1761+ let result = extractor. convert_run ( & run) ;
1762+ // Should NOT have formatting markers
1763+ assert_eq ! ( result, "bold text" ) ;
1764+ assert ! ( !result. contains( '*' ) ) ;
1765+ }
1766+
1767+ #[ test]
1768+ fn test_convert_run_bold_italic_combined ( ) {
1769+ let extractor = AsciiDocExtractor :: new ( ) ;
1770+
1771+ let run = Run {
1772+ text : "emphasis" . to_string ( ) ,
1773+ bold : true ,
1774+ italic : true ,
1775+ monospace : false ,
1776+ } ;
1777+
1778+ let result = extractor. convert_run ( & run) ;
1779+ // Should have both bold and italic markers
1780+ assert ! ( result. contains( '*' ) ) ;
1781+ assert ! ( result. contains( '_' ) ) ;
1782+ }
13991783}
0 commit comments