epiforecasts
diff --git a/‎.github/workflows/lint-changed-files.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/lint-changed-files.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.lintr‎
Lines changed: 2 additions & 7 deletions b/‎.lintr‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎NEWS.md‎
Lines changed: 5 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎R/as_contact_survey.R‎
Lines changed: 13 additions & 6 deletions b/‎R/as_contact_survey.R‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎R/assign-age-groups.R‎
Lines changed: 5 additions & 5 deletions b/‎R/assign-age-groups.R‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎R/check.R‎
Lines changed: 72 additions & 45 deletions b/‎R/check.R‎
Lines changed: 72 additions & 45 deletions
diff --git a/‎R/checkers.R‎
Lines changed: 1 addition & 0 deletions b/‎R/checkers.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/clean.R‎
Lines changed: 3 additions & 2 deletions b/‎R/clean.R‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎R/contact-matrix-utils.R‎
Lines changed: 5 additions & 4 deletions b/‎R/contact-matrix-utils.R‎
Lines changed: 5 additions & 4 deletions
@@ -21,6 +21,7 @@ jobs:
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
           extra-packages: |
+            any::cyclocomp
             any::gh
             any::lintr
             any::purrr
 
@@ -5,12 +5,7 @@ linters: linters_with_tags(
     implicit_integer_linter = NULL,
     todo_comment_linter = NULL,
     function_argument_linter = NULL,
-    backport_linter("3.5.0"),
-    # Extra exclusions for socialmixr.
-    # These exclusions should ideally be removed at some point.
-    line_length_linter = NULL,
-    cyclocomp_linter = NULL,
-    object_usage_linter = NULL,
-    indentation_linter = NULL
+    indentation_linter = NULL, # air handles indentation via style.yaml
+    backport_linter("3.5.0")
   )
 
@@ -1,5 +1,10 @@
 # socialmixr (development version)
 
+* Enabled `cyclocomp_linter`, `line_length_linter`, and `object_usage_linter`.
+  Disabled `indentation_linter` (air handles indentation). Reduced cyclomatic
+  complexity of `check.contact_survey()`, `[.contact_survey()`, and
+  `find_unique_key()` by extracting helper functions.
+
 * New `[.contact_survey` method allows filtering survey objects with
   expressions, e.g. `polymod[country == "United Kingdom"]` (#161).
 
 
@@ -1,6 +1,7 @@
 #' @title Check contact survey data
 #'
-#' @description Checks that a survey fulfills all the requirements to work with the 'contact_matrix' function
+#' @description Checks that a survey fulfills all the requirements
+#'   to work with the 'contact_matrix' function
 #'
 #' @param x list containing
 #'  - an element named 'participants', a data frame containing participant
@@ -9,12 +10,18 @@
 #'  - (optionally) an element named 'reference, a list containing information
 #'   information needed to reference the survey, in particular it can contain$a
 #'   "title", "bibtype", "author", "doi", "publisher", "note", "year"
-#' @param id_column the column in both the `participants` and `contacts` data frames that links contacts to participants
-#' @param country_column the column in the `participants` data frame containing the country in which the participant was queried; if NULL (default), will use "country" column if present
-#' @param year_column the column in the `participants` data frame containing the year in which the participant was queried; if NULL (default), will use "year" column if present
+#' @param id_column the column in both the `participants` and
+#'   `contacts` data frames that links contacts to participants
+#' @param country_column the column in the `participants` data frame
+#'   containing the country in which the participant was queried; if
+#'   NULL (default), will use "country" column if present
+#' @param year_column the column in the `participants` data frame
+#'   containing the year in which the participant was queried; if
+#'   NULL (default), will use "year" column if present
 #' @param ... additional arguments (currently ignored)
-#' @param id.column,country.column,year.column `r lifecycle::badge("deprecated")`
-#'   Use the underscore versions (e.g., `id_column`) instead.
+#' @param id.column,country.column,year.column
+#'   `r lifecycle::badge("deprecated")` Use the underscore versions
+#'   (e.g., `id_column`) instead.
 #' @importFrom checkmate assert_list assert_names assert_data_frame
 #'   assert_character
 #' @importFrom purrr walk
 
@@ -70,7 +70,7 @@ assign_age_groups <- function(
   survey$participants <- add_part_age(survey$participants)
   survey$contacts <- add_contact_age(survey$contacts)
 
-  ## Impute participant ages from ranges ----------------------------------------
+  ## Impute participant ages from ranges ------------------------------------
   survey$participants <- impute_participant_ages(
     participants = survey$participants,
     estimate = estimated_participant_age
@@ -87,7 +87,7 @@ assign_age_groups <- function(
     )
   )
 
-  ## Impute contact ages from ranges --------------------------------------------
+  ## Impute contact ages from ranges ------------------------------------------
   survey$contacts <- impute_contact_ages(
     contacts = survey$contacts,
     estimate = estimated_contact_age
@@ -96,14 +96,14 @@ assign_age_groups <- function(
   # define age limits if not given
   age_limits <- age_limits %||% get_age_limits(survey)
 
-  ## Process participant ages: handle missing data ------------------------------
+  ## Process participant ages: handle missing data ----------------------------
   survey$participants <- drop_invalid_ages(
     participants = survey$participants,
     missing_action = missing_participant_age,
     age_limits = age_limits
   )
 
-  ## Process contact ages: handle missing data ----------------------------------
+  ## Process contact ages: handle missing data --------------------------------
   # remove contact ages below the age limit, before dealing with missing ages
   survey$contacts <- drop_ages_below_age_limit(
     data = survey$contacts,
@@ -127,7 +127,7 @@ assign_age_groups <- function(
     age_limits = age_limits
   )
 
-  ## assign contact age groups based on participant age groups ------------------
+  ## assign contact age groups based on participant age groups ----------------
   max_age <- max_participant_age(survey$participants)
   survey$contacts <- add_contact_age_groups(
     contacts = survey$contacts,
 
@@ -1,17 +1,69 @@
+#' Check that an age column (or its estimated fallbacks) exists
+#'
+#' @param df a data.frame to check
+#' @param age_column the primary age column name
+#' @param label a human-readable label for warning messages
+#'   (e.g. "participant" or "contact")
+#' @return `TRUE` if the column (or fallbacks) exist, `FALSE` otherwise
+#' @noRd
+check_age_column <- function(
+  df,
+  age_column,
+  label,
+  call = rlang::caller_env()
+) {
+  if (age_column %in% colnames(df)) {
+    return(TRUE)
+  }
+
+  exact_col <- paste(age_column, "exact", sep = "_")
+  min_col <- paste(age_column, "est_min", sep = "_")
+  max_col <- paste(age_column, "est_max", sep = "_")
+
+  if (
+    (exact_col %in% colnames(df)) ||
+      (min_col %in% colnames(df) && max_col %in% colnames(df))
+  ) {
+    return(TRUE)
+  }
+
+  cli::cli_warn(
+    "{label} age column {.arg {age_column}} or columns to \\
+    estimate {tolower(label)} age ({.arg {exact_col}} or \\
+    {.arg {min_col}} and {.arg {max_col}}) do not exist in \\
+    the {tolower(label)} data frame.",
+    call = call
+  )
+  FALSE
+}
+
 #' @export
 check <- function(x, ...) UseMethod("check")
 #' @name check
 #' @rdname check
 #' @title Check contact survey data
 #'
-#' @description Checks that a survey fulfills all the requirements to work with the 'contact_matrix' function
+#' @description Checks that a survey fulfills all the requirements
+#'   to work with the 'contact_matrix' function
 #'
 #' @param x A [survey()] object
-#' @param id.column the column in both the `participants` and `contacts` data frames that links contacts to participants
-#' @param participant.age.column the column in the `participants` data frame containing participants' age; if this does not exist, at least columns "..._exact", "..._est_min" and "..._est_max" must (see the `estimated.participant.age` option in [contact_matrix()])
-#' @param country.column the column in the `participants` data frame containing the country in which the participant was queried
-#' @param year.column the column in the `participants` data frame containing the year in which the participant was queried
-#' @param contact.age.column the column in the `contacts` data frame containing the age of contacts; if this does not exist, at least columns "..._exact", "..._est_min" and "..._est_max" must (see the `estimated.contact.age` option in [contact_matrix()])
+#' @param id.column the column in both the `participants` and
+#'   `contacts` data frames that links contacts to participants
+#' @param participant.age.column the column in the `participants`
+#'   data frame containing participants' age; if this does not
+#'   exist, at least columns "..._exact", "..._est_min" and
+#'   "..._est_max" must exist (see the
+#'   `estimated.participant.age` option in [contact_matrix()])
+#' @param country.column the column in the `participants` data
+#'   frame containing the country in which the participant was
+#'   queried
+#' @param year.column the column in the `participants` data frame
+#'   containing the year in which the participant was queried
+#' @param contact.age.column the column in the `contacts` data
+#'   frame containing the age of contacts; if this does not exist,
+#'   at least columns "..._exact", "..._est_min" and
+#'   "..._est_max" must exist (see the
+#'   `estimated.contact.age` option in [contact_matrix()])
 #' @param ... ignored
 #' @return invisibly returns a character vector of the relevant columns
 #' @examples
@@ -38,7 +90,8 @@ check.contact_survey <- function(
   chkDots(...)
   if (!is.data.frame(x$participants) || !is.data.frame(x$contacts)) {
     cli::cli_abort(
-      "The {.field participants} and {.field contacts} elements of {.arg x} must be data.frames."
+      "The {.field participants} and {.field contacts} elements of \\
+      {.arg x} must be data.frames."
     )
   }
 
@@ -51,50 +104,24 @@ check.contact_survey <- function(
       id.column %in% colnames(x$contacts))
   ) {
     cli::cli_warn(
-      "{.arg id.columns} {.val {id.column}} does not exist in both the
-   participants and contacts data frames."
+      "{.arg id.column} {.val {id.column}} does not exist in both \\
+      the participants and contacts data frames."
     )
     success <- FALSE
   }
 
-  if (!(participant.age.column %in% colnames(x$participants))) {
-    exact.column <- paste(participant.age.column, "exact", sep = "_")
-    min.column <- paste(participant.age.column, "est_min", sep = "_")
-    max.column <- paste(participant.age.column, "est_max", sep = "_")
-
-    if (
-      !((exact.column %in% colnames(x$participants)) ||
-        (min.column %in%
-          colnames(x$participants) &&
-          max.column %in% colnames(x$participants)))
-    ) {
-      cli::cli_warn(
-        "Participant age column {.arg {participant.age.column}} or columns to
-   estimate participant age ({.arg {exact.column}} or {.arg {min.column}}
-   and {.arg {max.column}}) do not exist in the participant data frame."
-      )
-      success <- FALSE
-    }
+  if (
+    !check_age_column(
+      x$participants,
+      participant.age.column,
+      "Participant"
+    )
+  ) {
+    success <- FALSE
   }
 
-  if (!(contact.age.column %in% colnames(x$contacts))) {
-    exact.column <- paste(contact.age.column, "exact", sep = "_")
-    min.column <- paste(contact.age.column, "est_min", sep = "_")
-    max.column <- paste(contact.age.column, "est_max", sep = "_")
-
-    if (
-      !((exact.column %in% colnames(x$contacts)) ||
-        (min.column %in%
-          colnames(x$contacts) &&
-          max.column %in% colnames(x$contacts)))
-    ) {
-      cli::cli_warn(
-        "Contact age column {.var {contact.age.column}} or columns to
-   estimate contact age ({.var {exact.column}} or {.var {min.column}}
-   and {.var {max.column}}) do not exist in the contact data frame."
-      )
-      success <- FALSE
-    }
+  if (!check_age_column(x$contacts, contact.age.column, "Contact")) {
+    success <- FALSE
   }
 
   if (!(country.column %in% colnames(x$participants))) {
 
@@ -135,6 +135,7 @@ check_na_in_weighted_matrix <- function(
 ) {
   if (na_in_weighted_matrix(weighted_matrix) && split) {
     ## construct a warning in case there are NAs
+    # nolint next: object_usage_linter. Used in cli interpolation.
     warning_suggestion <- build_na_warning(weighted_matrix)
     cli::cli_warn(
       message = c(
 
@@ -37,7 +37,8 @@ normalise_country_names <- function(countries) {
 #' @description Cleans survey data to work with the 'contact_matrix' function
 #'
 #' @param x A [survey()] object
-#' @param participant_age_column the column in `x$participants` containing participants' age
+#' @param participant_age_column the column in `x$participants`
+#'   containing participants' age
 #' @param ... ignored
 #' @param participant.age.column `r lifecycle::badge("deprecated")`
 #'   Use `participant_age_column` instead.
@@ -47,7 +48,7 @@ normalise_country_names <- function(countries) {
 #' @return a cleaned survey in the correct format
 #' @examples
 #' data(polymod)
-#' cleaned <- clean(polymod) # not really necessary as the 'polymod' data set has already been cleaned
+#' cleaned <- clean(polymod) # not really necessary, polymod is clean
 #' @autoglobal
 #' @export
 clean.contact_survey <- function(
 
@@ -252,7 +252,8 @@ filter_countries <- function(participants, countries) {
 #' @param data A data.table containing age data
 #' @param prefix Column name prefix: "part_age" for participants, "cnt_age" for
 #'   contacts
-#' @returns The data with the age column set from exact ages or initialised to NA
+#' @returns The data with the age column set from exact ages or
+#'   initialised to NA
 #' @autoglobal
 #' @keywords internal
 add_age <- function(data, prefix) {
@@ -448,7 +449,7 @@ add_upper_age_limits <- function(
 #' @autoglobal
 survey_pop_from_data <- function(survey_pop, part_age_group_present) {
   survey_pop <- data.table(survey_pop)
-  # make sure the maximum survey_pop age exceeds the participant age group breaks
+  # make sure max survey_pop age exceeds participant age group breaks
   if (max(survey_pop$lower.age.limit) < max(part_age_group_present)) {
     survey_pop <- rbind(
       survey_pop,
@@ -571,7 +572,7 @@ survey_pop_year <- function(
     survey_year <- survey_pop_info$survey_year
   } else {
     part_age_group_present <- get_age_group_lower_limits(age_limits)
-    # if survey_pop is a data frame with columns 'lower.age.limit' and 'population'
+    # survey_pop is a data frame with 'lower.age.limit' and 'population'
     survey_pop <- survey_pop_from_data(survey_pop, part_age_group_present)
 
     # add dummy survey_year
@@ -803,7 +804,7 @@ impute_age_by_sample <- function(contacts) {
       ## some contacts in the age group have an age, sample from these
       contacts <- sample_present_age(contacts, this_age_group)
     } else if (nrow(contacts[!is.na(cnt_age), ]) > 0) {
-      ## no contacts in the age group have an age, sample uniformly between limits
+      ## no contacts in the age group have an age, sample uniformly
       contacts <- sample_uniform_age(contacts, this_age_group)
     }
   }