Skip to content

Commit f91280c

Browse files
author
Dominique Quatravaux
committed
[datacleanup] Versions ought to be unique per version group
1 parent 5ada4d1 commit f91280c

File tree

1 file changed

+22
-1
lines changed

1 file changed

+22
-1
lines changed

confluence.R

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,28 @@ page_versions <- {
144144
anti_join(relation1, relation2, by = by) %>% nrow == 0 &&
145145
anti_join(relation2, relation1, by = by) %>% nrow == 0
146146
})
147-
page_versions %>% select(-content_property_ids)
147+
page_versions <- page_versions %>% select(-content_property_ids)
148+
## Pages ought to be grouped into version histories by their
149+
## `originalVersion.Page`, which needs a little data-cleaning:
150+
stopifnot(page_versions %>%
151+
filter(content_id == originalVersion.Page) %>%
152+
nrow == 0)
153+
page_versions <-
154+
page_versions %>%
155+
mutate(.keep = "unused", # Except as shown below
156+
is.original = is.na(originalVersion.Page),
157+
originalVersion = coalesce(originalVersion.Page, content_id),
158+
## And we want to .keep this one too:
159+
content_id = content_id)
160+
## `version`s should be unique per version group (except perhaps
161+
## for drafts):
162+
stopifnot(page_versions %>%
163+
filter(contentStatus != "draft") %>%
164+
group_by(originalVersion) %>%
165+
summarize(n = n(), n_distinct = n_distinct(version)) %>%
166+
filter(n != n_distinct) %>%
167+
nrow == 0)
168+
page_versions
148169
}
149170

150171
bodies <- {

0 commit comments

Comments
 (0)