Skip to content

Commit be1ab7d

Browse files
alanbldclaude
andcommitted
test(ooxml): Sprint 7 - custom style detection + context/paragraph tests
- Fix TODO: detect custom styles via w:customStyle="1" attribute - Added is_custom field to StyleBuilder for parsing - Set builtin: false for styles with customStyle="1" - 3 tests for custom style detection - Add ConversionContext tests (8): style/relationship resolution, heading level fallback, hyperlink context, section breaks - Add Paragraph::is_empty edge cases (10): bookmark-only, empty hyperlinks, mixed children, images, whitespace handling - Add Paragraph iterators: runs() and images() helper tests 232 tests total (up from 213) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 1f2f7e3 commit be1ab7d

File tree

3 files changed

+461
-1
lines changed

3 files changed

+461
-1
lines changed

crates/utf8dok-ooxml/src/conversion.rs

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,4 +750,148 @@ mod tests {
750750
panic!("Expected Table block");
751751
}
752752
}
753+
754+
// ==================== Sprint 7: ConversionContext Tests ====================
755+
756+
#[test]
757+
fn test_context_with_styles() {
758+
use crate::styles::StyleSheet;
759+
760+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
761+
<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
762+
<w:style w:type="paragraph" w:styleId="CustomHeading1" w:customStyle="1">
763+
<w:name w:val="Custom Heading 1"/>
764+
<w:pPr><w:outlineLvl w:val="0"/></w:pPr>
765+
</w:style>
766+
<w:style w:type="paragraph" w:styleId="CustomHeading2" w:customStyle="1">
767+
<w:name w:val="Custom Heading 2"/>
768+
<w:pPr><w:outlineLvl w:val="1"/></w:pPr>
769+
</w:style>
770+
</w:styles>"#;
771+
772+
let styles = StyleSheet::parse(xml).unwrap();
773+
let ctx = ConversionContext::with_styles(&styles);
774+
775+
// Should detect heading level from outline level in stylesheet
776+
assert_eq!(ctx.heading_level("CustomHeading1"), Some(1));
777+
assert_eq!(ctx.heading_level("CustomHeading2"), Some(2));
778+
assert_eq!(ctx.heading_level("NonExistent"), None);
779+
}
780+
781+
#[test]
782+
fn test_context_with_styles_and_rels() {
783+
use crate::relationships::Relationships;
784+
use crate::styles::StyleSheet;
785+
786+
let styles_xml = br#"<?xml version="1.0" encoding="UTF-8"?>
787+
<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
788+
<w:style w:type="paragraph" w:styleId="Normal">
789+
<w:name w:val="Normal"/>
790+
</w:style>
791+
</w:styles>"#;
792+
793+
let rels_xml = br#"<?xml version="1.0" encoding="UTF-8"?>
794+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
795+
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" Target="https://example.com" TargetMode="External"/>
796+
</Relationships>"#;
797+
798+
let styles = StyleSheet::parse(styles_xml).unwrap();
799+
let rels = Relationships::parse(rels_xml).unwrap();
800+
let ctx = ConversionContext::with_styles_and_rels(&styles, &rels);
801+
802+
// Verify both styles and relationships are accessible
803+
assert!(ctx.styles.is_some());
804+
assert!(ctx.relationships.is_some());
805+
assert_eq!(ctx.relationships.unwrap().get("rId1"), Some("https://example.com"));
806+
}
807+
808+
#[test]
809+
fn test_heading_level_fallback() {
810+
// Without styles, should fall back to parsing style ID
811+
let ctx = ConversionContext::new();
812+
813+
assert_eq!(ctx.heading_level("Heading1"), Some(1));
814+
assert_eq!(ctx.heading_level("Heading2"), Some(2));
815+
assert_eq!(ctx.heading_level("Heading9"), Some(9));
816+
assert_eq!(ctx.heading_level("heading3"), Some(3)); // lowercase
817+
assert_eq!(ctx.heading_level("Normal"), None);
818+
assert_eq!(ctx.heading_level("HeadingX"), None); // Not a number
819+
}
820+
821+
#[test]
822+
fn test_heading_level_stylesheet_takes_precedence() {
823+
use crate::styles::StyleSheet;
824+
825+
// Custom style with outline level 2, even though ID suggests level 1
826+
let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
827+
<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
828+
<w:style w:type="paragraph" w:styleId="Heading1">
829+
<w:name w:val="heading 1"/>
830+
<w:pPr><w:outlineLvl w:val="2"/></w:pPr>
831+
</w:style>
832+
</w:styles>"#;
833+
834+
let styles = StyleSheet::parse(xml).unwrap();
835+
let ctx = ConversionContext::with_styles(&styles);
836+
837+
// Should use outline level (3), not ID-based (1)
838+
assert_eq!(ctx.heading_level("Heading1"), Some(3));
839+
}
840+
841+
#[test]
842+
fn test_context_default() {
843+
let ctx = ConversionContext::default();
844+
assert!(ctx.styles.is_none());
845+
assert!(ctx.relationships.is_none());
846+
}
847+
848+
#[test]
849+
fn test_hyperlink_with_context() {
850+
use crate::relationships::Relationships;
851+
852+
let rels_xml = br#"<?xml version="1.0" encoding="UTF-8"?>
853+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
854+
<Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" Target="https://rust-lang.org" TargetMode="External"/>
855+
</Relationships>"#;
856+
857+
let rels = Relationships::parse(rels_xml).unwrap();
858+
let ctx = ConversionContext {
859+
styles: None,
860+
relationships: Some(&rels),
861+
};
862+
863+
// Create hyperlink with relationship ID
864+
let hyperlink = Hyperlink {
865+
id: Some("rId5".to_string()),
866+
anchor: None,
867+
runs: vec![Run {
868+
text: "Rust".to_string(),
869+
bold: false,
870+
italic: false,
871+
monospace: false,
872+
}],
873+
};
874+
875+
let inline = hyperlink.to_ast(&ctx);
876+
877+
if let Inline::Link(link) = inline {
878+
assert_eq!(link.url, "https://rust-lang.org");
879+
} else {
880+
panic!("Expected Link inline");
881+
}
882+
}
883+
884+
#[test]
885+
fn test_section_break_conversion_explicit() {
886+
let doc = Document {
887+
blocks: vec![Block::SectionBreak],
888+
};
889+
890+
let ast_doc = convert_document(&doc);
891+
assert_eq!(ast_doc.blocks.len(), 1);
892+
assert!(matches!(
893+
&ast_doc.blocks[0],
894+
AstBlock::Break(AstBreakType::Section)
895+
));
896+
}
753897
}

crates/utf8dok-ooxml/src/document.rs

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,4 +945,228 @@ mod tests {
945945
panic!("Expected paragraph");
946946
}
947947
}
948+
949+
// ==================== Sprint 7: Paragraph::is_empty Tests ====================
950+
951+
#[test]
952+
fn test_paragraph_is_empty_with_no_children() {
953+
let para = Paragraph {
954+
style_id: None,
955+
children: vec![],
956+
numbering: None,
957+
};
958+
assert!(para.is_empty());
959+
}
960+
961+
#[test]
962+
fn test_paragraph_is_empty_with_whitespace_only() {
963+
let para = Paragraph {
964+
style_id: None,
965+
children: vec![ParagraphChild::Run(Run {
966+
text: " \t\n ".to_string(),
967+
bold: false,
968+
italic: false,
969+
monospace: false,
970+
})],
971+
numbering: None,
972+
};
973+
assert!(para.is_empty());
974+
}
975+
976+
#[test]
977+
fn test_paragraph_is_empty_with_image() {
978+
use crate::image::{Image, ImagePosition};
979+
980+
let para = Paragraph {
981+
style_id: None,
982+
children: vec![ParagraphChild::Image(Image {
983+
id: 1,
984+
rel_id: "rId1".to_string(),
985+
target: "media/image.png".to_string(),
986+
alt: None,
987+
name: None,
988+
width_emu: None,
989+
height_emu: None,
990+
position: ImagePosition::Inline,
991+
})],
992+
numbering: None,
993+
};
994+
// Images are NOT empty
995+
assert!(!para.is_empty());
996+
}
997+
998+
#[test]
999+
fn test_paragraph_is_empty_with_bookmark_only() {
1000+
let para = Paragraph {
1001+
style_id: None,
1002+
children: vec![ParagraphChild::Bookmark(Bookmark {
1003+
name: "_Toc123".to_string(),
1004+
})],
1005+
numbering: None,
1006+
};
1007+
// Bookmarks are considered empty (no visible content)
1008+
assert!(para.is_empty());
1009+
}
1010+
1011+
#[test]
1012+
fn test_paragraph_is_empty_with_empty_hyperlink() {
1013+
let para = Paragraph {
1014+
style_id: None,
1015+
children: vec![ParagraphChild::Hyperlink(Hyperlink {
1016+
id: Some("rId1".to_string()),
1017+
anchor: None,
1018+
runs: vec![Run {
1019+
text: " ".to_string(), // Whitespace only
1020+
bold: false,
1021+
italic: false,
1022+
monospace: false,
1023+
}],
1024+
})],
1025+
numbering: None,
1026+
};
1027+
assert!(para.is_empty());
1028+
}
1029+
1030+
#[test]
1031+
fn test_paragraph_is_empty_with_hyperlink_and_image() {
1032+
use crate::image::{Image, ImagePosition};
1033+
1034+
let para = Paragraph {
1035+
style_id: None,
1036+
children: vec![
1037+
ParagraphChild::Hyperlink(Hyperlink {
1038+
id: Some("rId1".to_string()),
1039+
anchor: None,
1040+
runs: vec![], // Empty runs
1041+
}),
1042+
ParagraphChild::Image(Image {
1043+
id: 1,
1044+
rel_id: "rId2".to_string(),
1045+
target: "media/image.png".to_string(),
1046+
alt: None,
1047+
name: None,
1048+
width_emu: None,
1049+
height_emu: None,
1050+
position: ImagePosition::Inline,
1051+
}),
1052+
],
1053+
numbering: None,
1054+
};
1055+
// Image makes it non-empty
1056+
assert!(!para.is_empty());
1057+
}
1058+
1059+
#[test]
1060+
fn test_paragraph_is_empty_mixed_children() {
1061+
let para = Paragraph {
1062+
style_id: None,
1063+
children: vec![
1064+
ParagraphChild::Run(Run {
1065+
text: " ".to_string(), // Whitespace only
1066+
bold: false,
1067+
italic: false,
1068+
monospace: false,
1069+
}),
1070+
ParagraphChild::Run(Run {
1071+
text: "content".to_string(), // Has content
1072+
bold: false,
1073+
italic: false,
1074+
monospace: false,
1075+
}),
1076+
],
1077+
numbering: None,
1078+
};
1079+
// One run has content
1080+
assert!(!para.is_empty());
1081+
}
1082+
1083+
#[test]
1084+
fn test_paragraph_runs_iterator() {
1085+
let para = Paragraph {
1086+
style_id: None,
1087+
children: vec![
1088+
ParagraphChild::Run(Run {
1089+
text: "First ".to_string(),
1090+
bold: false,
1091+
italic: false,
1092+
monospace: false,
1093+
}),
1094+
ParagraphChild::Hyperlink(Hyperlink {
1095+
id: None,
1096+
anchor: Some("target".to_string()),
1097+
runs: vec![
1098+
Run {
1099+
text: "link".to_string(),
1100+
bold: true,
1101+
italic: false,
1102+
monospace: false,
1103+
},
1104+
Run {
1105+
text: " text".to_string(),
1106+
bold: false,
1107+
italic: false,
1108+
monospace: false,
1109+
},
1110+
],
1111+
}),
1112+
ParagraphChild::Run(Run {
1113+
text: " last".to_string(),
1114+
bold: false,
1115+
italic: false,
1116+
monospace: false,
1117+
}),
1118+
],
1119+
numbering: None,
1120+
};
1121+
1122+
let runs: Vec<_> = para.runs().collect();
1123+
assert_eq!(runs.len(), 4);
1124+
assert_eq!(runs[0].text, "First ");
1125+
assert_eq!(runs[1].text, "link");
1126+
assert_eq!(runs[2].text, " text");
1127+
assert_eq!(runs[3].text, " last");
1128+
}
1129+
1130+
#[test]
1131+
fn test_paragraph_images_iterator() {
1132+
use crate::image::{Image, ImagePosition};
1133+
1134+
let para = Paragraph {
1135+
style_id: None,
1136+
children: vec![
1137+
ParagraphChild::Run(Run {
1138+
text: "Text ".to_string(),
1139+
bold: false,
1140+
italic: false,
1141+
monospace: false,
1142+
}),
1143+
ParagraphChild::Image(Image {
1144+
id: 1,
1145+
rel_id: "rId1".to_string(),
1146+
target: "media/image1.png".to_string(),
1147+
alt: Some("First image".to_string()),
1148+
name: None,
1149+
width_emu: None,
1150+
height_emu: None,
1151+
position: ImagePosition::Inline,
1152+
}),
1153+
ParagraphChild::Image(Image {
1154+
id: 2,
1155+
rel_id: "rId2".to_string(),
1156+
target: "media/image2.png".to_string(),
1157+
alt: Some("Second image".to_string()),
1158+
name: None,
1159+
width_emu: None,
1160+
height_emu: None,
1161+
position: ImagePosition::Inline,
1162+
}),
1163+
],
1164+
numbering: None,
1165+
};
1166+
1167+
let images: Vec<_> = para.images().collect();
1168+
assert_eq!(images.len(), 2);
1169+
assert_eq!(images[0].target, "media/image1.png");
1170+
assert_eq!(images[1].target, "media/image2.png");
1171+
}
9481172
}

0 commit comments

Comments
 (0)