@@ -908,11 +908,10 @@ mod tests {
908908 </w:document>"# ;
909909
910910 let doc = Document :: parse ( xml) . unwrap ( ) ;
911- if let Block :: Paragraph ( p) = & doc. blocks [ 0 ] {
912- assert_eq ! ( p. style_id, Some ( "Heading1" . to_string( ) ) ) ;
913- } else {
911+ let Block :: Paragraph ( p) = & doc. blocks [ 0 ] else {
914912 panic ! ( "Expected paragraph" ) ;
915- }
913+ } ;
914+ assert_eq ! ( p. style_id, Some ( "Heading1" . to_string( ) ) ) ;
916915 }
917916
918917 #[ test]
@@ -931,19 +930,340 @@ mod tests {
931930 let doc = Document :: parse ( xml) . unwrap ( ) ;
932931 assert_eq ! ( doc. blocks. len( ) , 1 ) ;
933932
934- if let Block :: Paragraph ( p) = & doc. blocks [ 0 ] {
935- assert_eq ! ( p. children. len( ) , 1 ) ;
936- if let ParagraphChild :: Hyperlink ( h) = & p. children [ 0 ] {
937- assert_eq ! ( h. anchor, Some ( "_Toc123" . to_string( ) ) ) ;
938- assert_eq ! ( h. id, None ) ;
939- assert_eq ! ( h. runs. len( ) , 1 ) ;
940- assert_eq ! ( h. runs[ 0 ] . text, "Click me" ) ;
941- } else {
942- panic ! ( "Expected Hyperlink, got {:?}" , p. children[ 0 ] ) ;
943- }
944- } else {
933+ let Block :: Paragraph ( p) = & doc. blocks [ 0 ] else {
945934 panic ! ( "Expected paragraph" ) ;
946- }
935+ } ;
936+ assert_eq ! ( p. children. len( ) , 1 ) ;
937+ let ParagraphChild :: Hyperlink ( h) = & p. children [ 0 ] else {
938+ panic ! ( "Expected Hyperlink" ) ;
939+ } ;
940+ assert_eq ! ( h. anchor, Some ( "_Toc123" . to_string( ) ) ) ;
941+ assert_eq ! ( h. id, None ) ;
942+ assert_eq ! ( h. runs. len( ) , 1 ) ;
943+ assert_eq ! ( h. runs[ 0 ] . text, "Click me" ) ;
944+ }
945+
946+ // ==================== Sprint 8: Document::parse Integration Tests ====================
947+
948+ #[ test]
949+ fn test_parse_table_simple ( ) {
950+ // Note: tblStyle must be non-self-closing for current parser
951+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
952+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
953+ <w:body>
954+ <w:tbl>
955+ <w:tblPr>
956+ <w:tblStyle w:val="TableGrid"></w:tblStyle>
957+ </w:tblPr>
958+ <w:tr>
959+ <w:tc>
960+ <w:p><w:r><w:t>Cell 1</w:t></w:r></w:p>
961+ </w:tc>
962+ <w:tc>
963+ <w:p><w:r><w:t>Cell 2</w:t></w:r></w:p>
964+ </w:tc>
965+ </w:tr>
966+ <w:tr>
967+ <w:tc>
968+ <w:p><w:r><w:t>Cell 3</w:t></w:r></w:p>
969+ </w:tc>
970+ <w:tc>
971+ <w:p><w:r><w:t>Cell 4</w:t></w:r></w:p>
972+ </w:tc>
973+ </w:tr>
974+ </w:tbl>
975+ </w:body>
976+ </w:document>"# ;
977+
978+ let doc = Document :: parse ( xml) . unwrap ( ) ;
979+ assert_eq ! ( doc. blocks. len( ) , 1 ) ;
980+
981+ let Block :: Table ( t) = & doc. blocks [ 0 ] else {
982+ panic ! ( "Expected Table" ) ;
983+ } ;
984+ assert_eq ! ( t. style_id, Some ( "TableGrid" . to_string( ) ) ) ;
985+ assert_eq ! ( t. rows. len( ) , 2 ) ;
986+ assert_eq ! ( t. rows[ 0 ] . cells. len( ) , 2 ) ;
987+ assert_eq ! ( t. rows[ 0 ] . cells[ 0 ] . paragraphs[ 0 ] . plain_text( ) , "Cell 1" ) ;
988+ assert_eq ! ( t. rows[ 1 ] . cells[ 1 ] . paragraphs[ 0 ] . plain_text( ) , "Cell 4" ) ;
989+ }
990+
991+ #[ test]
992+ fn test_parse_table_without_style ( ) {
993+ // Test table without tblStyle element
994+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
995+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
996+ <w:body>
997+ <w:tbl>
998+ <w:tr>
999+ <w:tc><w:p><w:r><w:t>Data</w:t></w:r></w:p></w:tc>
1000+ </w:tr>
1001+ </w:tbl>
1002+ </w:body>
1003+ </w:document>"# ;
1004+
1005+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1006+ let Block :: Table ( t) = & doc. blocks [ 0 ] else {
1007+ panic ! ( "Expected Table" ) ;
1008+ } ;
1009+ assert ! ( t. style_id. is_none( ) ) ;
1010+ assert_eq ! ( t. rows. len( ) , 1 ) ;
1011+ }
1012+
1013+ #[ test]
1014+ fn test_parse_table_multiple_rows ( ) {
1015+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1016+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1017+ <w:body>
1018+ <w:tbl>
1019+ <w:tr>
1020+ <w:tc><w:p><w:r><w:t>Header</w:t></w:r></w:p></w:tc>
1021+ </w:tr>
1022+ <w:tr>
1023+ <w:tc><w:p><w:r><w:t>Data</w:t></w:r></w:p></w:tc>
1024+ </w:tr>
1025+ </w:tbl>
1026+ </w:body>
1027+ </w:document>"# ;
1028+
1029+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1030+ let Block :: Table ( t) = & doc. blocks [ 0 ] else {
1031+ panic ! ( "Expected Table" ) ;
1032+ } ;
1033+ assert_eq ! ( t. rows. len( ) , 2 ) ;
1034+ // Note: is_header detection not yet implemented
1035+ assert_eq ! ( t. rows[ 0 ] . cells[ 0 ] . paragraphs[ 0 ] . plain_text( ) , "Header" ) ;
1036+ assert_eq ! ( t. rows[ 1 ] . cells[ 0 ] . paragraphs[ 0 ] . plain_text( ) , "Data" ) ;
1037+ }
1038+
1039+ #[ test]
1040+ fn test_parse_section_break ( ) {
1041+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1042+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1043+ <w:body>
1044+ <w:p><w:r><w:t>Before break</w:t></w:r></w:p>
1045+ <w:p>
1046+ <w:pPr>
1047+ <w:sectPr>
1048+ <w:type w:val="nextPage"/>
1049+ </w:sectPr>
1050+ </w:pPr>
1051+ </w:p>
1052+ <w:p><w:r><w:t>After break</w:t></w:r></w:p>
1053+ </w:body>
1054+ </w:document>"# ;
1055+
1056+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1057+ // Should have: para, section para, para
1058+ assert ! ( doc. blocks. len( ) >= 2 ) ;
1059+ }
1060+
1061+ #[ test]
1062+ fn test_parse_numbering_reference ( ) {
1063+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1064+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1065+ <w:body>
1066+ <w:p>
1067+ <w:pPr>
1068+ <w:numPr>
1069+ <w:ilvl w:val="0"/>
1070+ <w:numId w:val="1"/>
1071+ </w:numPr>
1072+ </w:pPr>
1073+ <w:r><w:t>List item</w:t></w:r>
1074+ </w:p>
1075+ </w:body>
1076+ </w:document>"# ;
1077+
1078+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1079+ let Block :: Paragraph ( p) = & doc. blocks [ 0 ] else {
1080+ panic ! ( "Expected paragraph" ) ;
1081+ } ;
1082+ assert ! ( p. numbering. is_some( ) ) ;
1083+ let num = p. numbering . as_ref ( ) . unwrap ( ) ;
1084+ assert_eq ! ( num. num_id, 1 ) ;
1085+ assert_eq ! ( num. ilvl, 0 ) ;
1086+ }
1087+
1088+ #[ test]
1089+ fn test_parse_multiple_block_types ( ) {
1090+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1091+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1092+ <w:body>
1093+ <w:p><w:r><w:t>Intro</w:t></w:r></w:p>
1094+ <w:tbl>
1095+ <w:tr>
1096+ <w:tc><w:p><w:r><w:t>Data</w:t></w:r></w:p></w:tc>
1097+ </w:tr>
1098+ </w:tbl>
1099+ <w:p><w:r><w:t>Conclusion</w:t></w:r></w:p>
1100+ </w:body>
1101+ </w:document>"# ;
1102+
1103+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1104+ assert_eq ! ( doc. blocks. len( ) , 3 ) ;
1105+ assert ! ( matches!( & doc. blocks[ 0 ] , Block :: Paragraph ( _) ) ) ;
1106+ assert ! ( matches!( & doc. blocks[ 1 ] , Block :: Table ( _) ) ) ;
1107+ assert ! ( matches!( & doc. blocks[ 2 ] , Block :: Paragraph ( _) ) ) ;
1108+ }
1109+
1110+ #[ test]
1111+ fn test_parse_empty_document ( ) {
1112+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1113+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1114+ <w:body>
1115+ </w:body>
1116+ </w:document>"# ;
1117+
1118+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1119+ assert ! ( doc. blocks. is_empty( ) ) ;
1120+ }
1121+
1122+ #[ test]
1123+ fn test_parse_run_with_formatting ( ) {
1124+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1125+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1126+ <w:body>
1127+ <w:p>
1128+ <w:r>
1129+ <w:rPr>
1130+ <w:b/>
1131+ <w:i/>
1132+ </w:rPr>
1133+ <w:t>Bold and italic</w:t>
1134+ </w:r>
1135+ </w:p>
1136+ </w:body>
1137+ </w:document>"# ;
1138+
1139+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1140+ let Block :: Paragraph ( p) = & doc. blocks [ 0 ] else {
1141+ panic ! ( "Expected paragraph" ) ;
1142+ } ;
1143+ let ParagraphChild :: Run ( r) = & p. children [ 0 ] else {
1144+ panic ! ( "Expected run" ) ;
1145+ } ;
1146+ assert ! ( r. bold) ;
1147+ assert ! ( r. italic) ;
1148+ assert_eq ! ( r. text, "Bold and italic" ) ;
1149+ }
1150+
1151+ #[ test]
1152+ fn test_parse_hyperlink_with_external_id ( ) {
1153+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1154+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
1155+ xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
1156+ <w:body>
1157+ <w:p>
1158+ <w:hyperlink r:id="rId5">
1159+ <w:r><w:t>External link</w:t></w:r>
1160+ </w:hyperlink>
1161+ </w:p>
1162+ </w:body>
1163+ </w:document>"# ;
1164+
1165+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1166+ let Block :: Paragraph ( p) = & doc. blocks [ 0 ] else {
1167+ panic ! ( "Expected paragraph" ) ;
1168+ } ;
1169+ let ParagraphChild :: Hyperlink ( h) = & p. children [ 0 ] else {
1170+ panic ! ( "Expected hyperlink" ) ;
1171+ } ;
1172+ assert_eq ! ( h. id, Some ( "rId5" . to_string( ) ) ) ;
1173+ assert ! ( h. anchor. is_none( ) ) ;
1174+ }
1175+
1176+ #[ test]
1177+ fn test_parse_bookmark ( ) {
1178+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1179+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1180+ <w:body>
1181+ <w:p>
1182+ <w:bookmarkStart w:id="0" w:name="_Toc123456"/>
1183+ <w:r><w:t>Heading</w:t></w:r>
1184+ <w:bookmarkEnd w:id="0"/>
1185+ </w:p>
1186+ </w:body>
1187+ </w:document>"# ;
1188+
1189+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1190+ let Block :: Paragraph ( p) = & doc. blocks [ 0 ] else {
1191+ panic ! ( "Expected paragraph" ) ;
1192+ } ;
1193+ // Should have bookmark and run
1194+ let has_bookmark = p. children . iter ( ) . any ( |c| matches ! ( c, ParagraphChild :: Bookmark ( _) ) ) ;
1195+ assert ! ( has_bookmark) ;
1196+ }
1197+
1198+ #[ test]
1199+ fn test_parse_table_cell_with_paragraph ( ) {
1200+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1201+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1202+ <w:body>
1203+ <w:tbl>
1204+ <w:tr>
1205+ <w:tc>
1206+ <w:p><w:r><w:t>Cell content</w:t></w:r></w:p>
1207+ </w:tc>
1208+ </w:tr>
1209+ </w:tbl>
1210+ </w:body>
1211+ </w:document>"# ;
1212+
1213+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1214+ let Block :: Table ( t) = & doc. blocks [ 0 ] else {
1215+ panic ! ( "Expected Table" ) ;
1216+ } ;
1217+ assert_eq ! ( t. rows[ 0 ] . cells. len( ) , 1 ) ;
1218+ assert ! ( !t. rows[ 0 ] . cells[ 0 ] . paragraphs. is_empty( ) ) ;
1219+ assert_eq ! ( t. rows[ 0 ] . cells[ 0 ] . paragraphs[ 0 ] . plain_text( ) , "Cell content" ) ;
1220+ }
1221+
1222+ #[ test]
1223+ fn test_document_paragraphs_iterator_flattens_tables ( ) {
1224+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1225+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1226+ <w:body>
1227+ <w:p><w:r><w:t>First</w:t></w:r></w:p>
1228+ <w:p><w:r><w:t>Second</w:t></w:r></w:p>
1229+ <w:tbl>
1230+ <w:tr><w:tc><w:p><w:r><w:t>Table text</w:t></w:r></w:p></w:tc></w:tr>
1231+ </w:tbl>
1232+ </w:body>
1233+ </w:document>"# ;
1234+
1235+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1236+ let paras: Vec < _ > = doc. paragraphs ( ) . collect ( ) ;
1237+ // paragraphs() flattens tables, so includes table paragraphs
1238+ assert_eq ! ( paras. len( ) , 3 ) ;
1239+ assert_eq ! ( paras[ 0 ] . plain_text( ) , "First" ) ;
1240+ assert_eq ! ( paras[ 1 ] . plain_text( ) , "Second" ) ;
1241+ assert_eq ! ( paras[ 2 ] . plain_text( ) , "Table text" ) ;
1242+ }
1243+
1244+ #[ test]
1245+ fn test_document_blocks_access ( ) {
1246+ let xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1247+ <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
1248+ <w:body>
1249+ <w:p><w:r><w:t>Para</w:t></w:r></w:p>
1250+ <w:tbl>
1251+ <w:tr><w:tc><w:p><w:r><w:t>Table</w:t></w:r></w:p></w:tc></w:tr>
1252+ </w:tbl>
1253+ </w:body>
1254+ </w:document>"# ;
1255+
1256+ let doc = Document :: parse ( xml) . unwrap ( ) ;
1257+ // For block-level iteration, use blocks directly
1258+ assert_eq ! ( doc. blocks. len( ) , 2 ) ;
1259+ let top_level_paras: Vec < _ > = doc. blocks . iter ( )
1260+ . filter_map ( |b| match b {
1261+ Block :: Paragraph ( p) => Some ( p) ,
1262+ _ => None ,
1263+ } )
1264+ . collect ( ) ;
1265+ assert_eq ! ( top_level_paras. len( ) , 1 ) ;
1266+ assert_eq ! ( top_level_paras[ 0 ] . plain_text( ) , "Para" ) ;
9471267 }
9481268
9491269 // ==================== Sprint 7: Paragraph::is_empty Tests ====================
0 commit comments