@@ -725,7 +725,8 @@ summarise_as_paragraph <- function(data, preserve = FALSE) {
725725# ' - `footnote_text`: Footnote text content (character).
726726# ' - `link`: Hyperlink URL (character).
727727# ' - `link_to_bookmark`: Internal bookmark anchor name for hyperlinks (character).
728- # ' - `bookmark_start`: Name of the bookmark starting at this run (character).
728+ # ' - `bookmark_start`: Names of the bookmarks starting on this paragraph
729+ # ' (values are concatenated with '|').
729730# ' - `character_stylename`: Name of the character/run style (character).
730731# ' - `sz`: Font size in half-points (integer).
731732# ' - `sz_cs`: Complex script font size in half-points (integer).
@@ -821,17 +822,19 @@ docx_summary <- function(x, preserve = FALSE, remove_fields = FALSE, detailed =
821822 # # bookmark_nodes
822823 bookmark_nodes <- xml_find_all(
823824 x $ doc_obj $ get(),
824- " //w:p/w:bookmarkStart[following-sibling::*[1][self::w:r]]"
825- )
826- bookmark_nodes_siblings <- xml_find_all(
827- x $ doc_obj $ get(),
828- " //w:p/w:bookmarkStart[following-sibling::*[1][self::w:r]]/following-sibling::*[1]"
825+ " w:body/w:p/w:bookmarkStart"
829826 )
830827
831828 data_bookmark <- data.frame (
832829 bookmark_start = xml_attr(bookmark_nodes , " name" ),
833- run_index = xml_attr(bookmark_nodes_siblings , " run_index" )
830+ doc_index = xml_attr(xml_child(bookmark_nodes , " parent::w:p" ), " doc_index" )
831+ )
832+ data_bookmark <- summarise(
833+ .data = data_bookmark ,
834+ bookmark_start = paste0(.data $ bookmark_start , collapse = " |" ),
835+ .by = all_of(" doc_index" )
834836 )
837+ data_bookmark $ doc_index <- as.integer(data_bookmark $ doc_index )
835838
836839 # # p_in_cell_nodes
837840 p_in_cell_nodes <- xml_find_all(
@@ -842,7 +845,6 @@ docx_summary <- function(x, preserve = FALSE, remove_fields = FALSE, detailed =
842845 # info for runs: infotbl_runs -----
843846 infotbl_runs_contents <- docx_runs_content_information(run_content_nodes )
844847 infotbl_runs <- docx_runs_information(run_nodes )
845- infotbl_runs <- left_join(infotbl_runs , data_bookmark , by = " run_index" )
846848
847849 # info for tables: infotbl_tables -----
848850 tmp_infotbl_tables <- docx_tables_information(tbl_nodes )
@@ -859,6 +861,7 @@ docx_summary <- function(x, preserve = FALSE, remove_fields = FALSE, detailed =
859861
860862 # info for paragraphs: infotbl_paragraphs -----
861863 infotbl_paragraphs <- docx_p_information(p_nodes )
864+ infotbl_paragraphs <- left_join(infotbl_paragraphs , data_bookmark , by = " doc_index" )
862865
863866 # final joins -------
864867 data <- infotbl_join(infotbl_runs_contents , infotbl_runs , infotbl_paragraphs , infotbl_tables )
0 commit comments