@@ -38,17 +38,18 @@ confluence_id <- function (node_or_nodeset) {
38
38
xml_text
39
39
}
40
40
41
- # ' Format the “props” of each node in a nodeset, into a tibble
41
+ # ' Format the classless “props” of each node in a nodeset, into a tibble
42
42
# '
43
43
# ' @param objects_nodeset An XML nodeset (*not* a list of many
44
44
# ' nodesets)
45
45
# ' @return A tibble with as many rows as there are objects in
46
46
# ' objects_nodeset. The columns of the returned tibble are the XML
47
- # ' “name” attributes of any property nodes found, and the values
48
- # ' are the XML texts found within said property nodes.
47
+ # ' “name” attributes of any `property` sub-nodes that do *not*
48
+ # ' have a `class` attribute; and the values are the XML texts
49
+ # ' found within said property nodes.
49
50
props_tibble <- function (objects_nodeset ) {
50
51
objects_nodeset %> %
51
- xml_find_all(' property' , flatten = FALSE ) %> %
52
+ xml_find_all(' property[not(@class)] ' , flatten = FALSE ) %> %
52
53
tibble(row = seq_along(. ),
53
54
props = . ) %> %
54
55
rowwise() %> %
@@ -60,6 +61,32 @@ props_tibble <- function(objects_nodeset) {
60
61
select(- row )
61
62
}
62
63
64
+ # ' Format the “props” that have a `class` for each node in a nodeset, into a tibble
65
+ # '
66
+ # ' @param objects_nodeset An XML nodeset (*not* a list of many
67
+ # ' nodesets)
68
+ # ' @return A tibble with as many rows as there are objects in
69
+ # ' objects_nodeset. The columns of the returned tibble are named
70
+ # ' as `foo.Bar`, where `foo` (e.g. "content") is the `name`
71
+ # ' attribute, and `Bar` is the `class` attribute, of any
72
+ # ' `property` sub-nodes that have a `class` attribute; and the
73
+ # ' values are the XML texts found within said property nodes.
74
+ classful_props_tibble <- function (objects_nodeset ) {
75
+ objects_nodeset %> %
76
+ xml_find_all(' property[@class]' , flatten = FALSE ) %> %
77
+ tibble(row = seq_along(. ),
78
+ props = . ) %> %
79
+ rowwise() %> %
80
+ reframe(
81
+ row = row ,
82
+ pnames = xml_attr(props , ' name' ),
83
+ pclasses = xml_attr(props , ' class' ),
84
+ pvals = xml_text(props )) %> %
85
+ mutate(pkeys = paste(pnames , pclasses , sep = " ." ), .keep = " unused" ) %> %
86
+ pivot_wider(names_from = pkeys , values_from = pvals ) %> %
87
+ select(- row )
88
+ }
89
+
63
90
# # Again, stuff like
64
91
# #
65
92
# # object_pages <- entities %>%
@@ -74,6 +101,7 @@ page_versions <- {
74
101
ns <- entities_xml %> % xml_find_all(' //object[@class="Page"]' )
75
102
tibble(content_id = ns %> % confluence_id ) %> %
76
103
mutate(ns %> % props_tibble ) %> %
104
+ mutate(ns %> % classful_props_tibble ) %> %
77
105
mutate(ns %> %
78
106
xml_find_all(c(' collection[@name="contentProperties"]' ,
79
107
' element[@class="ContentProperty"]' ,
@@ -88,21 +116,22 @@ content_properties <- {
88
116
ns <- entities_xml %> % xml_find_all(' //object[@class="ContentProperty"]' )
89
117
tibble(property_id = ns %> % confluence_id ) %> %
90
118
mutate(ns %> % props_tibble ) %> %
91
- rename( content_id = content )
119
+ mutate( ns % > % classful_props_tibble )
92
120
}
93
121
94
122
# # page_versions$content_property_ids (as a “multivalued foreign key”)
95
- # # ought to be a subset of content_properties[c("content.Page",
96
- # # "property_id")]:
123
+ # # ought to contain the same information as
124
+ # # content_properties[c("content.Page", "property_id")]:
97
125
stopifnot({
98
126
relation1 <-
99
127
page_versions %> %
100
128
transmute(content_id , property_id = content_property_ids ) %> %
101
129
unnest_longer(property_id )
102
130
relation2 <-
103
131
content_properties %> %
132
+ filter(! is.na(content.Page )) %> %
104
133
transmute(content_id = content.Page , property_id )
105
- # # The opposite is not true, because some properties are for objects
106
- # # other than pages:
107
- anti_join(relation1 , relation2 , by = join_by( content_id , property_id ) ) %> % nrow == 0
134
+ by <- join_by( content_id , property_id )
135
+ anti_join( relation1 , relation2 , by = by ) % > % nrow == 0 &&
136
+ anti_join(relation2 , relation1 , by = by ) %> % nrow == 0
108
137
})
0 commit comments