Skip to content

Commit e547c73

Browse files
sbfnk-botsbfnk
andauthored
Enable all remaining linters (#294)
* Enable cyclocomp, line_length, object_usage, and indentation linters Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> * Address PR review comments and fix remaining lint violations Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> * Fix mispaired nolint blocks and remove unnecessary returns Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> * Forward call env in check_age_column and restore test qualifier Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> * Disable indentation_linter in favour of air formatter Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> * Reformat check_age_column signature per air formatter Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> * Fix backslash line continuations in cli_warn strings Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> * Fix roxygen wording and id.column warning typo Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> * Add cyclocomp to lint CI dependencies Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk> --------- Co-authored-by: sbfnk <sebastian.funk@lshtm.ac.uk>
1 parent 253c381 commit e547c73

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+877
-376
lines changed

.github/workflows/lint-changed-files.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ jobs:
2121
- uses: r-lib/actions/setup-r-dependencies@v2
2222
with:
2323
extra-packages: |
24+
any::cyclocomp
2425
any::gh
2526
any::lintr
2627
any::purrr

.lintr

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,7 @@ linters: linters_with_tags(
55
implicit_integer_linter = NULL,
66
todo_comment_linter = NULL,
77
function_argument_linter = NULL,
8-
backport_linter("3.5.0"),
9-
# Extra exclusions for socialmixr.
10-
# These exclusions should ideally be removed at some point.
11-
line_length_linter = NULL,
12-
cyclocomp_linter = NULL,
13-
object_usage_linter = NULL,
14-
indentation_linter = NULL
8+
indentation_linter = NULL, # air handles indentation via style.yaml
9+
backport_linter("3.5.0")
1510
)
1611

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# socialmixr (development version)
22

3+
* Enabled `cyclocomp_linter`, `line_length_linter`, and `object_usage_linter`.
4+
Disabled `indentation_linter` (air handles indentation). Reduced cyclomatic
5+
complexity of `check.contact_survey()`, `[.contact_survey()`, and
6+
`find_unique_key()` by extracting helper functions.
7+
38
* New `[.contact_survey` method allows filtering survey objects with
49
expressions, e.g. `polymod[country == "United Kingdom"]` (#161).
510

R/as_contact_survey.R

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#' @title Check contact survey data
22
#'
3-
#' @description Checks that a survey fulfills all the requirements to work with the 'contact_matrix' function
3+
#' @description Checks that a survey fulfills all the requirements
4+
#' to work with the 'contact_matrix' function
45
#'
56
#' @param x list containing
67
#' - an element named 'participants', a data frame containing participant
@@ -9,12 +10,18 @@
910
#' - (optionally) an element named 'reference, a list containing information
1011
#' information needed to reference the survey, in particular it can contain$a
1112
#' "title", "bibtype", "author", "doi", "publisher", "note", "year"
12-
#' @param id_column the column in both the `participants` and `contacts` data frames that links contacts to participants
13-
#' @param country_column the column in the `participants` data frame containing the country in which the participant was queried; if NULL (default), will use "country" column if present
14-
#' @param year_column the column in the `participants` data frame containing the year in which the participant was queried; if NULL (default), will use "year" column if present
13+
#' @param id_column the column in both the `participants` and
14+
#' `contacts` data frames that links contacts to participants
15+
#' @param country_column the column in the `participants` data frame
16+
#' containing the country in which the participant was queried; if
17+
#' NULL (default), will use "country" column if present
18+
#' @param year_column the column in the `participants` data frame
19+
#' containing the year in which the participant was queried; if
20+
#' NULL (default), will use "year" column if present
1521
#' @param ... additional arguments (currently ignored)
16-
#' @param id.column,country.column,year.column `r lifecycle::badge("deprecated")`
17-
#' Use the underscore versions (e.g., `id_column`) instead.
22+
#' @param id.column,country.column,year.column
23+
#' `r lifecycle::badge("deprecated")` Use the underscore versions
24+
#' (e.g., `id_column`) instead.
1825
#' @importFrom checkmate assert_list assert_names assert_data_frame
1926
#' assert_character
2027
#' @importFrom purrr walk

R/assign-age-groups.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ assign_age_groups <- function(
7070
survey$participants <- add_part_age(survey$participants)
7171
survey$contacts <- add_contact_age(survey$contacts)
7272

73-
## Impute participant ages from ranges ----------------------------------------
73+
## Impute participant ages from ranges ------------------------------------
7474
survey$participants <- impute_participant_ages(
7575
participants = survey$participants,
7676
estimate = estimated_participant_age
@@ -87,7 +87,7 @@ assign_age_groups <- function(
8787
)
8888
)
8989

90-
## Impute contact ages from ranges --------------------------------------------
90+
## Impute contact ages from ranges ------------------------------------------
9191
survey$contacts <- impute_contact_ages(
9292
contacts = survey$contacts,
9393
estimate = estimated_contact_age
@@ -96,14 +96,14 @@ assign_age_groups <- function(
9696
# define age limits if not given
9797
age_limits <- age_limits %||% get_age_limits(survey)
9898

99-
## Process participant ages: handle missing data ------------------------------
99+
## Process participant ages: handle missing data ----------------------------
100100
survey$participants <- drop_invalid_ages(
101101
participants = survey$participants,
102102
missing_action = missing_participant_age,
103103
age_limits = age_limits
104104
)
105105

106-
## Process contact ages: handle missing data ----------------------------------
106+
## Process contact ages: handle missing data --------------------------------
107107
# remove contact ages below the age limit, before dealing with missing ages
108108
survey$contacts <- drop_ages_below_age_limit(
109109
data = survey$contacts,
@@ -127,7 +127,7 @@ assign_age_groups <- function(
127127
age_limits = age_limits
128128
)
129129

130-
## assign contact age groups based on participant age groups ------------------
130+
## assign contact age groups based on participant age groups ----------------
131131
max_age <- max_participant_age(survey$participants)
132132
survey$contacts <- add_contact_age_groups(
133133
contacts = survey$contacts,

R/check.R

Lines changed: 72 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,69 @@
1+
#' Check that an age column (or its estimated fallbacks) exists
2+
#'
3+
#' @param df a data.frame to check
4+
#' @param age_column the primary age column name
5+
#' @param label a human-readable label for warning messages
6+
#' (e.g. "participant" or "contact")
7+
#' @return `TRUE` if the column (or fallbacks) exist, `FALSE` otherwise
8+
#' @noRd
9+
check_age_column <- function(
10+
df,
11+
age_column,
12+
label,
13+
call = rlang::caller_env()
14+
) {
15+
if (age_column %in% colnames(df)) {
16+
return(TRUE)
17+
}
18+
19+
exact_col <- paste(age_column, "exact", sep = "_")
20+
min_col <- paste(age_column, "est_min", sep = "_")
21+
max_col <- paste(age_column, "est_max", sep = "_")
22+
23+
if (
24+
(exact_col %in% colnames(df)) ||
25+
(min_col %in% colnames(df) && max_col %in% colnames(df))
26+
) {
27+
return(TRUE)
28+
}
29+
30+
cli::cli_warn(
31+
"{label} age column {.arg {age_column}} or columns to \\
32+
estimate {tolower(label)} age ({.arg {exact_col}} or \\
33+
{.arg {min_col}} and {.arg {max_col}}) do not exist in \\
34+
the {tolower(label)} data frame.",
35+
call = call
36+
)
37+
FALSE
38+
}
39+
140
#' @export
241
check <- function(x, ...) UseMethod("check")
342
#' @name check
443
#' @rdname check
544
#' @title Check contact survey data
645
#'
7-
#' @description Checks that a survey fulfills all the requirements to work with the 'contact_matrix' function
46+
#' @description Checks that a survey fulfills all the requirements
47+
#' to work with the 'contact_matrix' function
848
#'
949
#' @param x A [survey()] object
10-
#' @param id.column the column in both the `participants` and `contacts` data frames that links contacts to participants
11-
#' @param participant.age.column the column in the `participants` data frame containing participants' age; if this does not exist, at least columns "..._exact", "..._est_min" and "..._est_max" must (see the `estimated.participant.age` option in [contact_matrix()])
12-
#' @param country.column the column in the `participants` data frame containing the country in which the participant was queried
13-
#' @param year.column the column in the `participants` data frame containing the year in which the participant was queried
14-
#' @param contact.age.column the column in the `contacts` data frame containing the age of contacts; if this does not exist, at least columns "..._exact", "..._est_min" and "..._est_max" must (see the `estimated.contact.age` option in [contact_matrix()])
50+
#' @param id.column the column in both the `participants` and
51+
#' `contacts` data frames that links contacts to participants
52+
#' @param participant.age.column the column in the `participants`
53+
#' data frame containing participants' age; if this does not
54+
#' exist, at least columns "..._exact", "..._est_min" and
55+
#' "..._est_max" must exist (see the
56+
#' `estimated.participant.age` option in [contact_matrix()])
57+
#' @param country.column the column in the `participants` data
58+
#' frame containing the country in which the participant was
59+
#' queried
60+
#' @param year.column the column in the `participants` data frame
61+
#' containing the year in which the participant was queried
62+
#' @param contact.age.column the column in the `contacts` data
63+
#' frame containing the age of contacts; if this does not exist,
64+
#' at least columns "..._exact", "..._est_min" and
65+
#' "..._est_max" must exist (see the
66+
#' `estimated.contact.age` option in [contact_matrix()])
1567
#' @param ... ignored
1668
#' @return invisibly returns a character vector of the relevant columns
1769
#' @examples
@@ -38,7 +90,8 @@ check.contact_survey <- function(
3890
chkDots(...)
3991
if (!is.data.frame(x$participants) || !is.data.frame(x$contacts)) {
4092
cli::cli_abort(
41-
"The {.field participants} and {.field contacts} elements of {.arg x} must be data.frames."
93+
"The {.field participants} and {.field contacts} elements of \\
94+
{.arg x} must be data.frames."
4295
)
4396
}
4497

@@ -51,50 +104,24 @@ check.contact_survey <- function(
51104
id.column %in% colnames(x$contacts))
52105
) {
53106
cli::cli_warn(
54-
"{.arg id.columns} {.val {id.column}} does not exist in both the
55-
participants and contacts data frames."
107+
"{.arg id.column} {.val {id.column}} does not exist in both \\
108+
the participants and contacts data frames."
56109
)
57110
success <- FALSE
58111
}
59112

60-
if (!(participant.age.column %in% colnames(x$participants))) {
61-
exact.column <- paste(participant.age.column, "exact", sep = "_")
62-
min.column <- paste(participant.age.column, "est_min", sep = "_")
63-
max.column <- paste(participant.age.column, "est_max", sep = "_")
64-
65-
if (
66-
!((exact.column %in% colnames(x$participants)) ||
67-
(min.column %in%
68-
colnames(x$participants) &&
69-
max.column %in% colnames(x$participants)))
70-
) {
71-
cli::cli_warn(
72-
"Participant age column {.arg {participant.age.column}} or columns to
73-
estimate participant age ({.arg {exact.column}} or {.arg {min.column}}
74-
and {.arg {max.column}}) do not exist in the participant data frame."
75-
)
76-
success <- FALSE
77-
}
113+
if (
114+
!check_age_column(
115+
x$participants,
116+
participant.age.column,
117+
"Participant"
118+
)
119+
) {
120+
success <- FALSE
78121
}
79122

80-
if (!(contact.age.column %in% colnames(x$contacts))) {
81-
exact.column <- paste(contact.age.column, "exact", sep = "_")
82-
min.column <- paste(contact.age.column, "est_min", sep = "_")
83-
max.column <- paste(contact.age.column, "est_max", sep = "_")
84-
85-
if (
86-
!((exact.column %in% colnames(x$contacts)) ||
87-
(min.column %in%
88-
colnames(x$contacts) &&
89-
max.column %in% colnames(x$contacts)))
90-
) {
91-
cli::cli_warn(
92-
"Contact age column {.var {contact.age.column}} or columns to
93-
estimate contact age ({.var {exact.column}} or {.var {min.column}}
94-
and {.var {max.column}}) do not exist in the contact data frame."
95-
)
96-
success <- FALSE
97-
}
123+
if (!check_age_column(x$contacts, contact.age.column, "Contact")) {
124+
success <- FALSE
98125
}
99126

100127
if (!(country.column %in% colnames(x$participants))) {

R/checkers.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ check_na_in_weighted_matrix <- function(
135135
) {
136136
if (na_in_weighted_matrix(weighted_matrix) && split) {
137137
## construct a warning in case there are NAs
138+
# nolint next: object_usage_linter. Used in cli interpolation.
138139
warning_suggestion <- build_na_warning(weighted_matrix)
139140
cli::cli_warn(
140141
message = c(

R/clean.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ normalise_country_names <- function(countries) {
3737
#' @description Cleans survey data to work with the 'contact_matrix' function
3838
#'
3939
#' @param x A [survey()] object
40-
#' @param participant_age_column the column in `x$participants` containing participants' age
40+
#' @param participant_age_column the column in `x$participants`
41+
#' containing participants' age
4142
#' @param ... ignored
4243
#' @param participant.age.column `r lifecycle::badge("deprecated")`
4344
#' Use `participant_age_column` instead.
@@ -47,7 +48,7 @@ normalise_country_names <- function(countries) {
4748
#' @return a cleaned survey in the correct format
4849
#' @examples
4950
#' data(polymod)
50-
#' cleaned <- clean(polymod) # not really necessary as the 'polymod' data set has already been cleaned
51+
#' cleaned <- clean(polymod) # not really necessary, polymod is clean
5152
#' @autoglobal
5253
#' @export
5354
clean.contact_survey <- function(

R/contact-matrix-utils.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,8 @@ filter_countries <- function(participants, countries) {
252252
#' @param data A data.table containing age data
253253
#' @param prefix Column name prefix: "part_age" for participants, "cnt_age" for
254254
#' contacts
255-
#' @returns The data with the age column set from exact ages or initialised to NA
255+
#' @returns The data with the age column set from exact ages or
256+
#' initialised to NA
256257
#' @autoglobal
257258
#' @keywords internal
258259
add_age <- function(data, prefix) {
@@ -448,7 +449,7 @@ add_upper_age_limits <- function(
448449
#' @autoglobal
449450
survey_pop_from_data <- function(survey_pop, part_age_group_present) {
450451
survey_pop <- data.table(survey_pop)
451-
# make sure the maximum survey_pop age exceeds the participant age group breaks
452+
# make sure max survey_pop age exceeds participant age group breaks
452453
if (max(survey_pop$lower.age.limit) < max(part_age_group_present)) {
453454
survey_pop <- rbind(
454455
survey_pop,
@@ -571,7 +572,7 @@ survey_pop_year <- function(
571572
survey_year <- survey_pop_info$survey_year
572573
} else {
573574
part_age_group_present <- get_age_group_lower_limits(age_limits)
574-
# if survey_pop is a data frame with columns 'lower.age.limit' and 'population'
575+
# survey_pop is a data frame with 'lower.age.limit' and 'population'
575576
survey_pop <- survey_pop_from_data(survey_pop, part_age_group_present)
576577

577578
# add dummy survey_year
@@ -803,7 +804,7 @@ impute_age_by_sample <- function(contacts) {
803804
## some contacts in the age group have an age, sample from these
804805
contacts <- sample_present_age(contacts, this_age_group)
805806
} else if (nrow(contacts[!is.na(cnt_age), ]) > 0) {
806-
## no contacts in the age group have an age, sample uniformly between limits
807+
## no contacts in the age group have an age, sample uniformly
807808
contacts <- sample_uniform_age(contacts, this_age_group)
808809
}
809810
}

0 commit comments

Comments
 (0)