Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/lint-changed-files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: |
any::cyclocomp
any::gh
any::lintr
any::purrr
Expand Down
9 changes: 2 additions & 7 deletions .lintr
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@ linters: linters_with_tags(
implicit_integer_linter = NULL,
todo_comment_linter = NULL,
function_argument_linter = NULL,
backport_linter("3.5.0"),
# Extra exclusions for socialmixr.
# These exclusions should ideally be removed at some point.
line_length_linter = NULL,
cyclocomp_linter = NULL,
object_usage_linter = NULL,
indentation_linter = NULL
indentation_linter = NULL, # air handles indentation via style.yaml
backport_linter("3.5.0")
)

5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# socialmixr (development version)

* Enabled `cyclocomp_linter`, `line_length_linter`, and `object_usage_linter`.
Disabled `indentation_linter` (air handles indentation). Reduced cyclomatic
complexity of `check.contact_survey()`, `[.contact_survey()`, and
`find_unique_key()` by extracting helper functions.

* New `[.contact_survey` method allows filtering survey objects with
expressions, e.g. `polymod[country == "United Kingdom"]` (#161).

Expand Down
19 changes: 13 additions & 6 deletions R/as_contact_survey.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' @title Check contact survey data
#'
#' @description Checks that a survey fulfills all the requirements to work with the 'contact_matrix' function
#' @description Checks that a survey fulfills all the requirements
#' to work with the 'contact_matrix' function
#'
#' @param x list containing
#' - an element named 'participants', a data frame containing participant
Expand All @@ -9,12 +10,18 @@
#' - (optionally) an element named 'reference, a list containing information
#' information needed to reference the survey, in particular it can contain$a
#' "title", "bibtype", "author", "doi", "publisher", "note", "year"
#' @param id_column the column in both the `participants` and `contacts` data frames that links contacts to participants
#' @param country_column the column in the `participants` data frame containing the country in which the participant was queried; if NULL (default), will use "country" column if present
#' @param year_column the column in the `participants` data frame containing the year in which the participant was queried; if NULL (default), will use "year" column if present
#' @param id_column the column in both the `participants` and
#' `contacts` data frames that links contacts to participants
#' @param country_column the column in the `participants` data frame
#' containing the country in which the participant was queried; if
#' NULL (default), will use "country" column if present
#' @param year_column the column in the `participants` data frame
#' containing the year in which the participant was queried; if
#' NULL (default), will use "year" column if present
#' @param ... additional arguments (currently ignored)
#' @param id.column,country.column,year.column `r lifecycle::badge("deprecated")`
#' Use the underscore versions (e.g., `id_column`) instead.
#' @param id.column,country.column,year.column
#' `r lifecycle::badge("deprecated")` Use the underscore versions
#' (e.g., `id_column`) instead.
#' @importFrom checkmate assert_list assert_names assert_data_frame
#' assert_character
#' @importFrom purrr walk
Expand Down
10 changes: 5 additions & 5 deletions R/assign-age-groups.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ assign_age_groups <- function(
survey$participants <- add_part_age(survey$participants)
survey$contacts <- add_contact_age(survey$contacts)

## Impute participant ages from ranges ----------------------------------------
## Impute participant ages from ranges ------------------------------------
survey$participants <- impute_participant_ages(
participants = survey$participants,
estimate = estimated_participant_age
Expand All @@ -87,7 +87,7 @@ assign_age_groups <- function(
)
)

## Impute contact ages from ranges --------------------------------------------
## Impute contact ages from ranges ------------------------------------------
survey$contacts <- impute_contact_ages(
contacts = survey$contacts,
estimate = estimated_contact_age
Expand All @@ -96,14 +96,14 @@ assign_age_groups <- function(
# define age limits if not given
age_limits <- age_limits %||% get_age_limits(survey)

## Process participant ages: handle missing data ------------------------------
## Process participant ages: handle missing data ----------------------------
survey$participants <- drop_invalid_ages(
participants = survey$participants,
missing_action = missing_participant_age,
age_limits = age_limits
)

## Process contact ages: handle missing data ----------------------------------
## Process contact ages: handle missing data --------------------------------
# remove contact ages below the age limit, before dealing with missing ages
survey$contacts <- drop_ages_below_age_limit(
data = survey$contacts,
Expand All @@ -127,7 +127,7 @@ assign_age_groups <- function(
age_limits = age_limits
)

## assign contact age groups based on participant age groups ------------------
## assign contact age groups based on participant age groups ----------------
max_age <- max_participant_age(survey$participants)
survey$contacts <- add_contact_age_groups(
contacts = survey$contacts,
Expand Down
117 changes: 72 additions & 45 deletions R/check.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,69 @@
#' Check that an age column (or its estimated fallbacks) exists
#'
#' @param df a data.frame to check
#' @param age_column the primary age column name
#' @param label a human-readable label for warning messages
#' (e.g. "participant" or "contact")
#' @return `TRUE` if the column (or fallbacks) exist, `FALSE` otherwise
#' @noRd
check_age_column <- function(
df,
age_column,
label,
call = rlang::caller_env()
) {
if (age_column %in% colnames(df)) {
return(TRUE)
}

exact_col <- paste(age_column, "exact", sep = "_")
min_col <- paste(age_column, "est_min", sep = "_")
max_col <- paste(age_column, "est_max", sep = "_")

if (
(exact_col %in% colnames(df)) ||
(min_col %in% colnames(df) && max_col %in% colnames(df))
) {
return(TRUE)
}

cli::cli_warn(
"{label} age column {.arg {age_column}} or columns to \\
estimate {tolower(label)} age ({.arg {exact_col}} or \\
{.arg {min_col}} and {.arg {max_col}}) do not exist in \\
the {tolower(label)} data frame.",
call = call
)
FALSE
}

#' @export
check <- function(x, ...) UseMethod("check")
#' @name check
#' @rdname check
#' @title Check contact survey data
#'
#' @description Checks that a survey fulfills all the requirements to work with the 'contact_matrix' function
#' @description Checks that a survey fulfills all the requirements
#' to work with the 'contact_matrix' function
#'
#' @param x A [survey()] object
#' @param id.column the column in both the `participants` and `contacts` data frames that links contacts to participants
#' @param participant.age.column the column in the `participants` data frame containing participants' age; if this does not exist, at least columns "..._exact", "..._est_min" and "..._est_max" must (see the `estimated.participant.age` option in [contact_matrix()])
#' @param country.column the column in the `participants` data frame containing the country in which the participant was queried
#' @param year.column the column in the `participants` data frame containing the year in which the participant was queried
#' @param contact.age.column the column in the `contacts` data frame containing the age of contacts; if this does not exist, at least columns "..._exact", "..._est_min" and "..._est_max" must (see the `estimated.contact.age` option in [contact_matrix()])
#' @param id.column the column in both the `participants` and
#' `contacts` data frames that links contacts to participants
#' @param participant.age.column the column in the `participants`
#' data frame containing participants' age; if this does not
#' exist, at least columns "..._exact", "..._est_min" and
#' "..._est_max" must exist (see the
#' `estimated.participant.age` option in [contact_matrix()])
#' @param country.column the column in the `participants` data
#' frame containing the country in which the participant was
#' queried
#' @param year.column the column in the `participants` data frame
#' containing the year in which the participant was queried
#' @param contact.age.column the column in the `contacts` data
#' frame containing the age of contacts; if this does not exist,
#' at least columns "..._exact", "..._est_min" and
#' "..._est_max" must exist (see the
#' `estimated.contact.age` option in [contact_matrix()])
#' @param ... ignored
#' @return invisibly returns a character vector of the relevant columns
#' @examples
Expand All @@ -38,7 +90,8 @@ check.contact_survey <- function(
chkDots(...)
if (!is.data.frame(x$participants) || !is.data.frame(x$contacts)) {
cli::cli_abort(
"The {.field participants} and {.field contacts} elements of {.arg x} must be data.frames."
"The {.field participants} and {.field contacts} elements of \\
{.arg x} must be data.frames."
)
}

Expand All @@ -51,50 +104,24 @@ check.contact_survey <- function(
id.column %in% colnames(x$contacts))
) {
cli::cli_warn(
"{.arg id.columns} {.val {id.column}} does not exist in both the
participants and contacts data frames."
"{.arg id.column} {.val {id.column}} does not exist in both \\
the participants and contacts data frames."
)
success <- FALSE
}

if (!(participant.age.column %in% colnames(x$participants))) {
exact.column <- paste(participant.age.column, "exact", sep = "_")
min.column <- paste(participant.age.column, "est_min", sep = "_")
max.column <- paste(participant.age.column, "est_max", sep = "_")

if (
!((exact.column %in% colnames(x$participants)) ||
(min.column %in%
colnames(x$participants) &&
max.column %in% colnames(x$participants)))
) {
cli::cli_warn(
"Participant age column {.arg {participant.age.column}} or columns to
estimate participant age ({.arg {exact.column}} or {.arg {min.column}}
and {.arg {max.column}}) do not exist in the participant data frame."
)
success <- FALSE
}
if (
!check_age_column(
x$participants,
participant.age.column,
"Participant"
)
) {
success <- FALSE
}

if (!(contact.age.column %in% colnames(x$contacts))) {
exact.column <- paste(contact.age.column, "exact", sep = "_")
min.column <- paste(contact.age.column, "est_min", sep = "_")
max.column <- paste(contact.age.column, "est_max", sep = "_")

if (
!((exact.column %in% colnames(x$contacts)) ||
(min.column %in%
colnames(x$contacts) &&
max.column %in% colnames(x$contacts)))
) {
cli::cli_warn(
"Contact age column {.var {contact.age.column}} or columns to
estimate contact age ({.var {exact.column}} or {.var {min.column}}
and {.var {max.column}}) do not exist in the contact data frame."
)
success <- FALSE
}
if (!check_age_column(x$contacts, contact.age.column, "Contact")) {
success <- FALSE
}

if (!(country.column %in% colnames(x$participants))) {
Expand Down
1 change: 1 addition & 0 deletions R/checkers.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ check_na_in_weighted_matrix <- function(
) {
if (na_in_weighted_matrix(weighted_matrix) && split) {
## construct a warning in case there are NAs
# nolint next: object_usage_linter. Used in cli interpolation.
warning_suggestion <- build_na_warning(weighted_matrix)
cli::cli_warn(
message = c(
Expand Down
5 changes: 3 additions & 2 deletions R/clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ normalise_country_names <- function(countries) {
#' @description Cleans survey data to work with the 'contact_matrix' function
#'
#' @param x A [survey()] object
#' @param participant_age_column the column in `x$participants` containing participants' age
#' @param participant_age_column the column in `x$participants`
#' containing participants' age
#' @param ... ignored
#' @param participant.age.column `r lifecycle::badge("deprecated")`
#' Use `participant_age_column` instead.
Expand All @@ -47,7 +48,7 @@ normalise_country_names <- function(countries) {
#' @return a cleaned survey in the correct format
#' @examples
#' data(polymod)
#' cleaned <- clean(polymod) # not really necessary as the 'polymod' data set has already been cleaned
#' cleaned <- clean(polymod) # not really necessary, polymod is clean
#' @autoglobal
#' @export
clean.contact_survey <- function(
Expand Down
9 changes: 5 additions & 4 deletions R/contact-matrix-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ filter_countries <- function(participants, countries) {
#' @param data A data.table containing age data
#' @param prefix Column name prefix: "part_age" for participants, "cnt_age" for
#' contacts
#' @returns The data with the age column set from exact ages or initialised to NA
#' @returns The data with the age column set from exact ages or
#' initialised to NA
#' @autoglobal
#' @keywords internal
add_age <- function(data, prefix) {
Expand Down Expand Up @@ -448,7 +449,7 @@ add_upper_age_limits <- function(
#' @autoglobal
survey_pop_from_data <- function(survey_pop, part_age_group_present) {
survey_pop <- data.table(survey_pop)
# make sure the maximum survey_pop age exceeds the participant age group breaks
# make sure max survey_pop age exceeds participant age group breaks
if (max(survey_pop$lower.age.limit) < max(part_age_group_present)) {
survey_pop <- rbind(
survey_pop,
Expand Down Expand Up @@ -571,7 +572,7 @@ survey_pop_year <- function(
survey_year <- survey_pop_info$survey_year
} else {
part_age_group_present <- get_age_group_lower_limits(age_limits)
# if survey_pop is a data frame with columns 'lower.age.limit' and 'population'
# survey_pop is a data frame with 'lower.age.limit' and 'population'
survey_pop <- survey_pop_from_data(survey_pop, part_age_group_present)

# add dummy survey_year
Expand Down Expand Up @@ -803,7 +804,7 @@ impute_age_by_sample <- function(contacts) {
## some contacts in the age group have an age, sample from these
contacts <- sample_present_age(contacts, this_age_group)
} else if (nrow(contacts[!is.na(cnt_age), ]) > 0) {
## no contacts in the age group have an age, sample uniformly between limits
## no contacts in the age group have an age, sample uniformly
contacts <- sample_uniform_age(contacts, this_age_group)
}
}
Expand Down
Loading