Skip to content

Commit feeccd2

Browse files
alanbldclaude
andcommitted
feat(ooxml): Sprint 5 - Edge case test coverage
Add comprehensive edge case tests for error paths and boundary conditions: extract.rs (27 new tests): - Comments parsing: empty XML, no language prefix, malformed IDs - CommentRanges: empty document, tables - DocumentMetadata: empty/partial XML, header generation - AsciiDocExtractor: force_parse, code block detection, formatting - StyleMappings: empty TOML, tables serialization style_map.rs (13 new tests): - Position parsing: empty string, whitespace, percent sign only - Negative/zero values, invalid units, decimal values - Template expansion: unknown placeholders, empty values - CoverConfig and CoverElementConfig defaults Total OOXML tests: 272 (up from 232 in Sprint 4) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 75f4d86 commit feeccd2

File tree

2 files changed

+553
-0
lines changed

2 files changed

+553
-0
lines changed

crates/utf8dok-ooxml/src/extract.rs

Lines changed: 384 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,4 +1396,388 @@ mod tests {
13961396
// Third paragraph (index 2) has no comment
13971397
assert!(ranges.get_comment_ids(2).is_none());
13981398
}
1399+
1400+
// ==================== Sprint 5: Edge Case Tests ====================
1401+
1402+
#[test]
1403+
fn test_comments_empty_xml() {
1404+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1405+
<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1406+
</w:comments>"#;
1407+
1408+
let comments = Comments::parse(xml);
1409+
assert!(comments.get(0).is_none());
1410+
assert!(comments.get_language(0).is_none());
1411+
}
1412+
1413+
#[test]
1414+
fn test_comments_no_language_prefix() {
1415+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1416+
<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1417+
<w:comment w:id="0">
1418+
<w:p><w:r><w:t>Just a regular comment</w:t></w:r></w:p>
1419+
</w:comment>
1420+
</w:comments>"#;
1421+
1422+
let comments = Comments::parse(xml);
1423+
assert_eq!(comments.get(0), Some("Just a regular comment"));
1424+
assert_eq!(comments.get_language(0), None); // No "Language:" prefix
1425+
}
1426+
1427+
#[test]
1428+
fn test_comments_malformed_id() {
1429+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1430+
<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1431+
<w:comment w:id="not_a_number">
1432+
<w:p><w:r><w:t>Bad ID</w:t></w:r></w:p>
1433+
</w:comment>
1434+
</w:comments>"#;
1435+
1436+
let comments = Comments::parse(xml);
1437+
// Malformed ID should be skipped gracefully
1438+
assert!(comments.get(0).is_none());
1439+
}
1440+
1441+
#[test]
1442+
fn test_comment_ranges_empty_document() {
1443+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1444+
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1445+
<w:body></w:body>
1446+
</w:document>"#;
1447+
1448+
let ranges = CommentRanges::parse(xml);
1449+
assert!(ranges.get_comment_ids(0).is_none());
1450+
}
1451+
1452+
#[test]
1453+
fn test_comment_ranges_with_table() {
1454+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1455+
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1456+
<w:body>
1457+
<w:p><w:r><w:t>Before table</w:t></w:r></w:p>
1458+
<w:tbl>
1459+
<w:tr><w:tc><w:p><w:r><w:t>Cell</w:t></w:r></w:p></w:tc></w:tr>
1460+
</w:tbl>
1461+
<w:p><w:r><w:t>After table</w:t></w:r></w:p>
1462+
</w:body>
1463+
</w:document>"#;
1464+
1465+
let ranges = CommentRanges::parse(xml);
1466+
// Block indices: 0=para, 1=table, 2=para
1467+
assert!(ranges.get_comment_ids(0).is_none());
1468+
assert!(ranges.get_comment_ids(1).is_none());
1469+
assert!(ranges.get_comment_ids(2).is_none());
1470+
}
1471+
1472+
#[test]
1473+
fn test_metadata_parse_empty() {
1474+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1475+
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties">
1476+
</cp:coreProperties>"#;
1477+
1478+
let metadata = DocumentMetadata::parse(xml);
1479+
assert!(metadata.title.is_none());
1480+
assert!(metadata.author.is_none());
1481+
assert!(metadata.created.is_none());
1482+
}
1483+
1484+
#[test]
1485+
fn test_metadata_parse_partial() {
1486+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1487+
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
1488+
xmlns:dc="http://purl.org/dc/elements/1.1/"
1489+
xmlns:dcterms="http://purl.org/dc/terms/">
1490+
<dc:title>Test Document</dc:title>
1491+
<dc:creator>Test Author</dc:creator>
1492+
</cp:coreProperties>"#;
1493+
1494+
let metadata = DocumentMetadata::parse(xml);
1495+
assert_eq!(metadata.title, Some("Test Document".to_string()));
1496+
assert_eq!(metadata.author, Some("Test Author".to_string()));
1497+
assert!(metadata.subject.is_none());
1498+
assert!(metadata.modified.is_none());
1499+
}
1500+
1501+
#[test]
1502+
fn test_metadata_to_asciidoc_header_empty() {
1503+
let metadata = DocumentMetadata::default();
1504+
let header = metadata.to_asciidoc_header();
1505+
assert!(header.is_empty());
1506+
}
1507+
1508+
#[test]
1509+
fn test_metadata_to_asciidoc_header_with_author() {
1510+
let metadata = DocumentMetadata {
1511+
author: Some("John Doe".to_string()),
1512+
modified: Some("2025-01-15T10:30:00Z".to_string()),
1513+
..Default::default()
1514+
};
1515+
let header = metadata.to_asciidoc_header();
1516+
assert!(header.contains(":author: John Doe"));
1517+
assert!(header.contains(":revdate: 2025-01-15"));
1518+
}
1519+
1520+
#[test]
1521+
fn test_metadata_to_asciidoc_header_empty_author() {
1522+
let metadata = DocumentMetadata {
1523+
author: Some("".to_string()), // Empty string
1524+
..Default::default()
1525+
};
1526+
let header = metadata.to_asciidoc_header();
1527+
// Empty author should not produce :author: line
1528+
assert!(!header.contains(":author:"));
1529+
}
1530+
1531+
#[test]
1532+
fn test_extractor_with_force_parse() {
1533+
let extractor = AsciiDocExtractor::new().with_force_parse(true);
1534+
assert!(extractor.force_parse);
1535+
1536+
let extractor2 = AsciiDocExtractor::new().with_force_parse(false);
1537+
assert!(!extractor2.force_parse);
1538+
}
1539+
1540+
#[test]
1541+
fn test_style_mappings_to_toml_empty() {
1542+
let mappings = StyleMappings::default();
1543+
let toml = mappings.to_toml();
1544+
assert!(toml.contains("[styles]"));
1545+
// Should not have any style mappings
1546+
assert!(!toml.contains("heading"));
1547+
assert!(!toml.contains("paragraph"));
1548+
}
1549+
1550+
#[test]
1551+
fn test_style_mappings_to_toml_with_tables() {
1552+
let mappings = StyleMappings {
1553+
tables: vec!["TableGrid".to_string(), "TableSimple".to_string()],
1554+
..Default::default()
1555+
};
1556+
let toml = mappings.to_toml();
1557+
// Only first table should be included
1558+
assert!(toml.contains("table = \"TableGrid\""));
1559+
}
1560+
1561+
#[test]
1562+
fn test_convert_empty_table() {
1563+
let extractor = AsciiDocExtractor::new();
1564+
let table = Table {
1565+
rows: vec![],
1566+
style_id: None,
1567+
};
1568+
1569+
let result = extractor.convert_table(&table);
1570+
// Empty table should produce empty output
1571+
assert!(result.is_empty());
1572+
}
1573+
1574+
#[test]
1575+
fn test_is_numbered_list_heuristic() {
1576+
let extractor = AsciiDocExtractor::new();
1577+
let styles = StyleSheet::default();
1578+
1579+
// numId 2 is ordered list by convention
1580+
assert!(extractor.is_numbered_list(2, &styles));
1581+
// numId 1 is unordered list
1582+
assert!(!extractor.is_numbered_list(1, &styles));
1583+
// Other numIds default to unordered
1584+
assert!(!extractor.is_numbered_list(0, &styles));
1585+
assert!(!extractor.is_numbered_list(3, &styles));
1586+
}
1587+
1588+
#[test]
1589+
fn test_is_code_block_paragraph_mixed_formatting() {
1590+
let extractor = AsciiDocExtractor::new();
1591+
1592+
// Mixed monospace and non-monospace - NOT a code block
1593+
let para = Paragraph {
1594+
style_id: None,
1595+
numbering: None,
1596+
children: vec![
1597+
ParagraphChild::Run(Run {
1598+
text: "regular ".to_string(),
1599+
bold: false,
1600+
italic: false,
1601+
monospace: false,
1602+
}),
1603+
ParagraphChild::Run(Run {
1604+
text: "code\nmore".to_string(),
1605+
bold: false,
1606+
italic: false,
1607+
monospace: true,
1608+
}),
1609+
],
1610+
};
1611+
1612+
assert!(!extractor.is_code_block_paragraph(&para));
1613+
}
1614+
1615+
#[test]
1616+
fn test_is_code_block_paragraph_all_monospace_with_newline() {
1617+
let extractor = AsciiDocExtractor::new();
1618+
1619+
// All monospace with newline - IS a code block
1620+
let para = Paragraph {
1621+
style_id: None,
1622+
numbering: None,
1623+
children: vec![ParagraphChild::Run(Run {
1624+
text: "fn main() {\n println!(\"Hello\");\n}".to_string(),
1625+
bold: false,
1626+
italic: false,
1627+
monospace: true,
1628+
})],
1629+
};
1630+
1631+
assert!(extractor.is_code_block_paragraph(&para));
1632+
}
1633+
1634+
#[test]
1635+
fn test_is_code_block_paragraph_monospace_no_newline() {
1636+
let extractor = AsciiDocExtractor::new();
1637+
1638+
// Monospace but no newline - NOT a code block (inline code)
1639+
let para = Paragraph {
1640+
style_id: None,
1641+
numbering: None,
1642+
children: vec![ParagraphChild::Run(Run {
1643+
text: "inline_code".to_string(),
1644+
bold: false,
1645+
italic: false,
1646+
monospace: true,
1647+
})],
1648+
};
1649+
1650+
assert!(!extractor.is_code_block_paragraph(&para));
1651+
}
1652+
1653+
#[test]
1654+
fn test_get_raw_paragraph_text_with_hyperlink() {
1655+
let extractor = AsciiDocExtractor::new();
1656+
1657+
let para = Paragraph {
1658+
style_id: None,
1659+
numbering: None,
1660+
children: vec![
1661+
ParagraphChild::Run(Run {
1662+
text: "Click ".to_string(),
1663+
bold: false,
1664+
italic: false,
1665+
monospace: false,
1666+
}),
1667+
ParagraphChild::Hyperlink(Hyperlink {
1668+
id: None,
1669+
anchor: Some("target".to_string()),
1670+
runs: vec![Run {
1671+
text: "here".to_string(),
1672+
bold: false,
1673+
italic: false,
1674+
monospace: false,
1675+
}],
1676+
}),
1677+
ParagraphChild::Run(Run {
1678+
text: " for more.".to_string(),
1679+
bold: false,
1680+
italic: false,
1681+
monospace: false,
1682+
}),
1683+
],
1684+
};
1685+
1686+
let text = extractor.get_raw_paragraph_text(&para);
1687+
assert_eq!(text, "Click here for more.");
1688+
}
1689+
1690+
#[test]
1691+
fn test_get_raw_paragraph_text_with_image() {
1692+
let extractor = AsciiDocExtractor::new();
1693+
1694+
let para = Paragraph {
1695+
style_id: None,
1696+
numbering: None,
1697+
children: vec![
1698+
ParagraphChild::Run(Run {
1699+
text: "See ".to_string(),
1700+
bold: false,
1701+
italic: false,
1702+
monospace: false,
1703+
}),
1704+
ParagraphChild::Image(crate::image::Image {
1705+
id: 1,
1706+
rel_id: "rId1".to_string(),
1707+
target: "image.png".to_string(),
1708+
alt: Some("diagram".to_string()),
1709+
name: None,
1710+
width_emu: None,
1711+
height_emu: None,
1712+
position: crate::image::ImagePosition::Inline,
1713+
}),
1714+
],
1715+
};
1716+
1717+
let text = extractor.get_raw_paragraph_text(&para);
1718+
assert_eq!(text, "See diagram");
1719+
}
1720+
1721+
#[test]
1722+
fn test_convert_hyperlink_no_target() {
1723+
let extractor = AsciiDocExtractor::new();
1724+
1725+
// Hyperlink with no id and no anchor
1726+
let hyperlink = Hyperlink {
1727+
id: None,
1728+
anchor: None,
1729+
runs: vec![Run {
1730+
text: "orphan link".to_string(),
1731+
bold: false,
1732+
italic: false,
1733+
monospace: false,
1734+
}],
1735+
};
1736+
1737+
let result = extractor.convert_hyperlink(&hyperlink, None);
1738+
// Should produce <<,orphan link>> with empty anchor
1739+
assert!(result.contains("orphan link"));
1740+
}
1741+
1742+
#[test]
1743+
fn test_source_origin_variants() {
1744+
assert_eq!(SourceOrigin::Embedded, SourceOrigin::Embedded);
1745+
assert_eq!(SourceOrigin::Parsed, SourceOrigin::Parsed);
1746+
assert_ne!(SourceOrigin::Embedded, SourceOrigin::Parsed);
1747+
}
1748+
1749+
#[test]
1750+
fn test_convert_run_preserve_formatting_disabled() {
1751+
let mut extractor = AsciiDocExtractor::new();
1752+
extractor.preserve_formatting = false;
1753+
1754+
let run = Run {
1755+
text: "bold text".to_string(),
1756+
bold: true,
1757+
italic: false,
1758+
monospace: false,
1759+
};
1760+
1761+
let result = extractor.convert_run(&run);
1762+
// Should NOT have formatting markers
1763+
assert_eq!(result, "bold text");
1764+
assert!(!result.contains('*'));
1765+
}
1766+
1767+
#[test]
1768+
fn test_convert_run_bold_italic_combined() {
1769+
let extractor = AsciiDocExtractor::new();
1770+
1771+
let run = Run {
1772+
text: "emphasis".to_string(),
1773+
bold: true,
1774+
italic: true,
1775+
monospace: false,
1776+
};
1777+
1778+
let result = extractor.convert_run(&run);
1779+
// Should have both bold and italic markers
1780+
assert!(result.contains('*'));
1781+
assert!(result.contains('_'));
1782+
}
13991783
}

0 commit comments

Comments
 (0)