r-devel · RWParsons · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/weblate/R/collect_page_changes.R b/weblate/R/collect_page_changes.R
@@ -0,0 +1,36 @@
+collect_page_changes <- function(
+  changes_page,
+  language_file = NULL,
+  slugs = NULL,
+  name_of_libraries = NULL
+) {
+  stopifnot(
+    "language_file must be provided" = !is.null(language_file),
+    "slugs must be provided" = !is.null(slugs),
+    "name_of_libraries must be provided" = !is.null(name_of_libraries)
+  )
+  page_data <- process_page_response(changes_page)
+
+  languages <- match_language_names(
+    page_data$extracted_lang,
+    language_file
+  )
+
+  extracted_lib <- numeric(length(page_data$extracted_slug))
+  k <- 1
+  for (s in page_data$extracted_slug) {
+    index <- which(s == slugs)
+    extracted_lib[k] <- name_of_libraries[index]
+    print(k)
+    k <- k + 1
+  }
+
+  data.frame(
+    user = page_data$extracted_users,
+    language = languages,
+    library = extracted_lib,
+    units = page_data$extracted_units,
+    date = as.integer(page_data$date),
+    time = page_data$time
+  )
+}
diff --git a/weblate/R/fetch_responses.R b/weblate/R/fetch_responses.R
@@ -0,0 +1,28 @@
+fetch_response_content <- function(endpoint, handle) {
+  print(paste("Querying endpoint", endpoint))
+
+  response <- curl_fetch_memory(endpoint, handle = h)
+  response_content <- rawToChar(response$content)
+  fromJSON(response_content)
+}
+
+calculate_n_pages <- function(count, page_size = 50) {
+  remain <- count %% page_size
+
+  if (remain == 0) {
+    pages <- count / page_size
+  } else {
+    pages <- ceiling(count / page_size)
+  }
+
+  return(pages)
+}
+
+fetch_pages_content <- function(n_pages, endpoint, handle) {
+  pages <- vector("list", n_pages)
+  for (i in 1:n_pages) {
+    url <- paste0(endpoint, "&page=", i)
+    pages[[i]] <- fetch_response_content(url, handle)
+  }
+  pages
+}
diff --git a/weblate/R/get_auth_handle.R b/weblate/R/get_auth_handle.R
@@ -0,0 +1,19 @@
+#' Get authorisation handle for data fetching
+#'
+#' @param API_TOKEN access credential 
+#'
+#' @returns handle for fetching
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' API_TOKEN <- Sys.getenv("WEBLATE_TOKEN")
+#' h <- get_auth_handle()
+#' }
+get_auth_handle <- function(API_TOKEN) {
+  h <- new_handle()
+  handle_setopt(h, ssl_verifyhost = 0L, ssl_verifypeer = 0L)
+  handle_setopt(h, customrequest = "GET")
+  handle_setopt(h, httpheader = c(paste0("Authorization: Token ", API_TOKEN)))
+  return(h)
+}
diff --git a/weblate/R/mark_page.R b/weblate/R/mark_page.R
@@ -0,0 +1,53 @@
+#' Extract information from newly edited records on a given page
+#'
+#' @param page_no page of edited changes to be fetched
+#' @param edit_url url to access edited changes page
+#' @param language_file reference file containing the link between language
+#'   code and language full name
+#'
+#' @returns dataframe containing language, library, string, id, and url for
+#'   the edited records on this given page
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' mark_page(1,
+#'   "https://translate.rx.studio/api/units/?q=project:r-project%20AND%20state:needs-editing",
+#'   Language_Statistics)
+#' }
+mark_page <- function(page_no, edit_url, language_file = NULL) {
+  # input checking
+  stopifnot(
+    "page should be greater than 0" = page_no > 0,
+    "edit_url should not be empty" = nzchar(edit_url),
+    "language_file must be provided" = !is.null(language_file)
+  )
+
+  mark_url <- paste0(edit_url, "&page=", page_no)
+  mark_changes <- fetch_response_content(endpoint = mark_url, handle = h)
+  # each row is a unit: https://docs.weblate.org/en/latest/api.html#units
+
+  mark_lang <- match_language_names(
+    mark_changes$results$language_code,
+    language_file
+  )
+
+  mark_lib_id <- match(
+    basename(dirname(mark_changes$results$translation)),
+    slugs
+  )
+  mark_lib <- name_of_libraries[mark_lib_id]
+
+  # where there are multiple messages due to plurals, use the first
+  mark_string <- vapply(mark_changes$results$source, "[", character(1), 1)
+  mark_units <- mark_changes$results$id
+  mark_web_url <- mark_changes$results$web_url
+
+  data.frame(
+    language = mark_lang,
+    library = mark_lib,
+    string = mark_string,
+    id = mark_units,
+    url = mark_web_url
+  )
+}
diff --git a/weblate/R/match_language_names.R b/weblate/R/match_language_names.R
@@ -0,0 +1,20 @@
+#' Match language code page changes to reference list
+#'
+#' @param extracted_language_codes language code extracted from the 
+#'   page changes objects
+#' @param language_file reference file containing the link between language
+#'   code and language full name
+#'
+#' @returns vector of language names
+#' @export
+#'
+#' @examples
+#' extracted_lang <- c('ar', 'bn', 'ca')
+#' languages <- match_language_names(extracted_lang, Language_Statistics)
+match_language_names <- function (extracted_language_codes,
+                                  language_file) {
+  lang_codes <- match(extracted_language_codes, language_file$Code)
+  languages <- language_file$Name[lang_codes]
+  return(languages)
+}
+
diff --git a/weblate/R/process_page_response.R b/weblate/R/process_page_response.R
@@ -0,0 +1,67 @@
+#' Extract and clean data from a page
+#'
+#' @param pages_changes
+#'
+#' @returns A list.
+#' @export
+process_page_response <- function(page) {
+  component <- extract_str(page$results$component, "components/(.*?)/")
+  component <- gsub("components/|/", "", component)
+  extracted_users <- extract_str(page$results$user, "/([^/]+)/$")
+  extracted_users <- gsub("/", "", extracted_users)
+  extracted_lang <- extract_str(page$results$translation, "/([^/]+)/$")
+  extracted_lang <- gsub("/", "", extracted_lang)
+  extracted_slug <- extract_str(page$results$component, "/([^/]+)/$")
+  extracted_slug <- gsub("/", "", extracted_slug)
+  extracted_units <- extract_str(page$results$unit, "/([^/]+)/$")
+  extracted_units <- gsub("/", "", extracted_units)
+
+  datetime <- as.POSIXct(
+    page$results$timestamp,
+    format = "%Y-%m-%dT%H:%M:%OSZ"
+  )
+  datetime <- strptime(datetime, format = "%Y-%m-%d %H:%M:%S")
+  date <- as.Date(datetime)
+  time <- format(datetime, format = "%H:%M:%S")
+
+  list(
+    extracted_users = extracted_users,
+    extracted_lang = extracted_lang,
+    extracted_slug = extracted_slug,
+    extracted_units = extracted_units,
+    date = date,
+    time = time
+  )
+}
+
+
+#' Basically the same as stringr::str_extract()
+#'
+#' @param x A character vector.
+#' @param pattern A regular expression.
+#'
+#' @returns A character vector.
+#'
+#' @examples
+#' extract_str(c('test1', 'apple'), 'app')
+extract_str <- function(x, pattern) {
+  unname(sapply(x, function(.x) extract_str_or_na(.x, pattern)))
+}
+
+
+#' Extract the string given a pattern and return NA if no match.
+#'
+#' @param i A character.
+#' @param pattern A regular expression.
+#'
+#' @returns A character.
+#'
+#' @examples
+#' extract_str_or_na('apple', 'app')
+extract_str_or_na <- function(i, pattern) {
+  m <- unlist(regmatches(i, gregexpr(pattern, i)))
+  if (length(m) == 0) {
+    return(NA_character_)
+  }
+  m
+}