steno-aarhus
diff --git a/‎.github/workflows/build.yaml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/build.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎R/add.R‎
Lines changed: 7 additions & 3 deletions b/‎R/add.R‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎R/algorithm.R‎
Lines changed: 5 additions & 5 deletions b/‎R/algorithm.R‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎R/classify-diabetes.R‎
Lines changed: 38 additions & 37 deletions b/‎R/classify-diabetes.R‎
Lines changed: 38 additions & 37 deletions
diff --git a/‎R/create-inclusion-dates.R‎
Lines changed: 1 addition & 1 deletion b/‎R/create-inclusion-dates.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/dates.R‎
Lines changed: 9 additions & 7 deletions b/‎R/dates.R‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎R/drop.R‎
Lines changed: 5 additions & 14 deletions b/‎R/drop.R‎
Lines changed: 5 additions & 14 deletions
diff --git a/‎R/edge-cases.R‎
Lines changed: 3 additions & 4 deletions b/‎R/edge-cases.R‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎R/join-inclusions.R‎
Lines changed: 1 addition & 1 deletion b/‎R/join-inclusions.R‎
Lines changed: 1 addition & 1 deletion
@@ -48,6 +48,8 @@ jobs:
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       R_KEEP_PKG_SOURCE: yes
+      # For tests to use to not run.
+      DEVELOP_R: ${{ matrix.config.r == 'devel' }}
 
     steps:
       - uses: actions/checkout@v4
 
@@ -28,6 +28,7 @@ Imports:
     checkmate,
     cli,
     codeCollection,
+    dbplyr,
     dplyr,
     duckplyr,
     fabricatr,
@@ -37,7 +38,8 @@ Imports:
     rlang,
     rvest,
     stats,
-    tidyselect
+    tidyselect,
+    utils
 Suggests:
     glue,
     knitr,
 
@@ -30,19 +30,23 @@ add_insulin_purchases_cols <- function(gld_hba1c_after_drop_steps) {
     # number of packages purchased
     dplyr::mutate(
       contained_doses = .data$volume * .data$apk,
-      is_insulin_gld_code = !!logic$is_insulin_gld_code,
-      date = as_date(date)
+      is_insulin_gld_code = !!logic$is_insulin_gld_code
     ) |>
     dplyr::select(
       "pnr",
       "date",
       "contained_doses",
       "is_insulin_gld_code"
     ) |>
-    dplyr::summarise(
+    dplyr::mutate(
+      # Needs to be done before hand, can't use the same variable in
+      # `summarise()` when using SQL.
       # Get first date of a GLD purchase and if a purchase of insulin occurs
       # within 180 days of the first purchase.
       first_gld_date = min(date, na.rm = TRUE),
+      .by = "pnr"
+    ) |>
+    dplyr::summarise(
       has_insulin_purchases_within_180_days = !!logic$has_insulin_purchases_within_180_days,
       # Sum up total doses of insulin and of all GLD.
       n_insulin_doses = sum(
 
@@ -68,13 +68,13 @@ algorithm <- function() {
     lpr2_is_endocrinology_dept = list(
       register = "lpr_adm",
       title = "LPR2 endocrinology department",
-      logic = "c_spec == 8",
+      logic = "c_spec == 8L",
       comments = "`TRUE` when the department where the recorded diagnosis was endocrinology."
     ),
     lpr2_is_medical_dept = list(
       register = "lpr_adm",
       title = "LPR2 other medical department",
-      logic = "c_spec %in% c(1:7, 9:30)",
+      logic = "c_spec %in% c(1L:7L, 9L:30L)",
       comments = "`TRUE` when the diagnosis was recorded at a medical department other than endocrinology."
     ),
     lpr2_is_pregnancy_code = list(
@@ -141,7 +141,7 @@ algorithm <- function() {
     is_within_pregnancy_interval = list(
       register = NA,
       title = "Events that are within a potential pregnancy interval",
-      logic = "has_pregnancy_event AND date >= (pregnancy_event_date - weeks(40)) AND date <= (pregnancy_event_date + weeks(12))",
+      logic = "has_pregnancy_event AND date >= (pregnancy_event_date - weeks(40L)) AND date <= (pregnancy_event_date + weeks(12L))",
       comments = "The potential pregnancy interval is defined as 40 weeks before and 12 weeks after the pregnancy event date (birth or miscarriage)."
     ),
     is_podiatrist_services = list(
@@ -153,7 +153,7 @@ algorithm <- function() {
     is_not_metformin_for_pcos = list(
       register = NA,
       title = "Metformin purchases that aren't potentially for the treatment of PCOS",
-      logic = "NOT (koen == 2 AND atc =~ '^A10BA02$' AND ((date - foed_dato) < years(40) OR indication_code %in% c('0000092', '0000276', '0000781')))",
+      logic = "NOT (koen == 2 AND atc =~ '^A10BA02$' AND (date < (foed_dato + years(40)) OR indication_code %in% c('0000092', '0000276', '0000781')))",
       comments = "Woman is defined as 2 in `koen`."
     ),
     has_t1d = list(
@@ -189,7 +189,7 @@ algorithm <- function() {
     has_insulin_purchases_within_180_days = list(
       register = NA,
       title = "Whether any insulin was purchased within 180 days of the first purchase of GLD",
-      logic = "any(is_insulin_gld_code & date <= (first_gld_date + days(180)))",
+      logic = "any(is_insulin_gld_code & date <= (first_gld_date + days(180L)), na.rm = TRUE)",
       comments = "This is used to classify type 1 diabetes. It determines if any insulin was bought shortly after first buying any type of GLD, which suggests type 1 diabetes."
     )
   )
 
@@ -31,20 +31,14 @@
 #'   description of the internal implementation of this classification function.
 #'
 #' @examples
-#' register_data <- simulate_registers(
-#'   c(
-#'     "kontakter",
-#'     "diagnoser",
-#'     "lpr_diag",
-#'     "lpr_adm",
-#'     "sysi",
-#'     "sssy",
-#'     "lab_forsker",
-#'     "bef",
-#'     "lmdb"
-#'   ),
-#'   n = 10000
-#' )
+#' # Can't run this multiple times, will cause an error as the table
+#' # has already been created in the DuckDB connection.
+#' register_data <- registers() |>
+#'   names() |>
+#'   simulate_registers() |>
+#'   purrr::map(duckplyr::as_duckdb_tibble) |>
+#'   purrr::map(duckplyr::as_tbl)
+#'
 #' classify_diabetes(
 #'   kontakter = register_data$kontakter,
 #'   diagnoser = register_data$diagnoser,
@@ -69,26 +63,35 @@ classify_diabetes <- function(
   stable_inclusion_start_date = "1998-01-01"
 ) {
   # Input checks -----
-  check_is_duckdb(kontakter)
-  check_is_duckdb(diagnoser)
-  check_is_duckdb(lpr_diag)
-  check_is_duckdb(lpr_adm)
-  check_is_duckdb(sysi)
-  check_is_duckdb(sssy)
-  check_is_duckdb(lab_forsker)
-  check_is_duckdb(bef)
-  check_is_duckdb(lmdb)
+
+  # Convert to dbplyr connection with duckdb to use dbplyr functions
+  # (since duckplyr is still in development).
+  # Also need to convert here rather than as a function, because of the
+  # way duckplyr works. It creates a temporary DuckDB DB in the background
+  # based on the name of the object passed to it.
+  registers <- list(
+    kontakter = kontakter,
+    diagnoser = diagnoser,
+    lpr_diag = lpr_diag,
+    lpr_adm = lpr_adm,
+    sysi = sysi,
+    sssy = sssy,
+    lab_forsker = lab_forsker,
+    bef = bef,
+    lmdb = lmdb
+  ) |>
+    purrr::map(verify_duckdb)
 
   # Verification step -----
-  kontakter <- select_required_variables(kontakter, "kontakter")
-  diagnoser <- select_required_variables(diagnoser, "diagnoser")
-  lpr_diag <- select_required_variables(lpr_diag, "lpr_diag")
-  lpr_adm <- select_required_variables(lpr_adm, "lpr_adm")
-  sysi <- select_required_variables(sysi, "sysi")
-  sssy <- select_required_variables(sssy, "sssy")
-  lab_forsker <- select_required_variables(lab_forsker, "lab_forsker")
-  bef <- select_required_variables(bef, "bef")
-  lmdb <- select_required_variables(lmdb, "lmdb")
+  kontakter <- select_required_variables(registers$kontakter, "kontakter")
+  diagnoser <- select_required_variables(registers$diagnoser, "diagnoser")
+  lpr_diag <- select_required_variables(registers$lpr_diag, "lpr_diag")
+  lpr_adm <- select_required_variables(registers$lpr_adm, "lpr_adm")
+  sysi <- select_required_variables(registers$sysi, "sysi")
+  sssy <- select_required_variables(registers$sssy, "sssy")
+  lab_forsker <- select_required_variables(registers$lab_forsker, "lab_forsker")
+  bef <- select_required_variables(registers$bef, "bef")
+  lmdb <- select_required_variables(registers$lmdb, "lmdb")
 
   # Initially processing -----
   lpr2 <- prepare_lpr2(
@@ -178,27 +181,25 @@ classify_diabetes <- function(
     )
 }
 
-check_is_duckdb <- function(data, call = rlang::caller_env()) {
+verify_duckdb <- function(data, call = rlang::caller_env()) {
   check <- checkmate::test_multi_class(
     data,
     classes = c(
       "tbl_duckdb_connection",
-      "duckplyr_df",
-      "duckplyr_tbl",
       "duckdb_connection"
     )
   )
   if (!check) {
     cli::cli_abort(
       message = c(
-        "The data needs to be a DuckDB object because we heavily process the data.",
+        "The data needs to be a {.cls tbl_duckdb_connection} object because we heavily process the data and need the power.",
         "i" = "The data has the class{?es}: {.code {class(data)}}"
       ),
       call = call
     )
   }
 
-  invisible(NULL)
+  data
 }
 
 #' After filtering, classify those with type 1 diabetes.
 
@@ -42,7 +42,7 @@ create_inclusion_dates <- function(
       # Set the stable inclusion date to NA if the raw inclusion date is before
       # stable_inclusion_start_date.
       stable_inclusion_date = dplyr::if_else(
-        .data$raw_inclusion_date < as_date(stable_inclusion_start_date),
+        .data$raw_inclusion_date < as.Date(stable_inclusion_start_date),
         NA,
         .data$raw_inclusion_date
       )
 
@@ -1,12 +1,14 @@
-#' Simple `as.Date()` wrapper.
+#' Translate to SQL for datetime conversion to eventually date
 #'
-#' DuckDB doesn't support using [lubridate::as_date()], so this is
-#' a simple wrapper around [as.Date()] with the correct formats.
+#' DuckDB doesn't support using [lubridate::as_date()], so this
+#' uses [dbplyr::sql()] to directly use DuckDB's `strptime` to
+#' convert strings to datetimes. Afterwards, it can be converted
+#' to dates.
 #'
-#' @param x A character (or date) column.
+#' @param x A character (or date) column, in quotes.
 #'
-#' @returns A Date column.
+#' @returns A Datetime column.
 #' @keywords internal
-as_date <- function(x) {
-  as.Date(x, tryFormats = c("%Y%m%d", "%Y-%m-%d"))
+as_sql_datetime <- function(x) {
+  dbplyr::sql(glue::glue("strptime({x}, ['%Y%m%d', '%Y-%m-%d'])"))
 }
@@ -24,8 +24,10 @@ drop_pcos <- function(gld_purchases, bef) {
   gld_purchases |>
     dplyr::inner_join(bef, by = dplyr::join_by("pnr")) |>
     dplyr::mutate(
-      date = as_date(.data$date),
-      foed_dato = as_date(.data$foed_dato)
+      date = !!as_sql_datetime("date"),
+      date = as.Date(.data$date),
+      foed_dato = !!as_sql_datetime("foed_dato"),
+      foed_dato = as.Date(.data$foed_dato)
     ) |>
     # Use !! to inject the expression into filter
     dplyr::filter(!!logic) |>
@@ -70,17 +72,6 @@ drop_pregnancies <- function(
 ) {
   criteria <- logic_as_expression("is_within_pregnancy_interval")[[1]]
 
-  # TODO: This should be done at an earlier stage.
-  # Ensure both date columns are of type Date.
-  dropped_pcos <- dropped_pcos |>
-    dplyr::mutate(
-      date = as_date(.data$date)
-    )
-  included_hba1c <- included_hba1c |>
-    dplyr::mutate(
-      date = as_date(.data$date)
-    )
-
   dropped_pcos |>
     # Full join to keep rows from both dropped_pcos and included_hba1c.
     dplyr::full_join(included_hba1c, by = dplyr::join_by("pnr", "date")) |>
@@ -100,7 +91,7 @@ drop_pregnancies <- function(
     # inside another for the same pnr.
     # Only keep rows that don't fall within any pregnancy interval.
     dplyr::filter(
-      !any(.data$is_within_pregnancy_interval),
+      !any(.data$is_within_pregnancy_interval, na.rm = TRUE),
       .by = c("pnr", "date")
     ) |>
     # Drop columns that were only used here.
 
@@ -1,7 +1,7 @@
 #' Create a synthetic dataset of edge case inputs
 #'
 #' @description
-#' This function generates a list of DuckDB tibbles representing the Danish health
+#' This function generates a list of tibbles representing the Danish health
 #' registers and the data necessary to run the algorithm. The dataset contains
 #' 23 individual cases (`pnr`s), each designed to test a specific logical branch
 #' of the diabetes classification algorithm, including inclusion, exclusion,
@@ -11,7 +11,7 @@
 #' behaves as expected under a wide range of conditions, but it is also intended
 #' to be explored by users to better understand how the algorithm logic works.
 #'
-#' @return A named list of 9  [duckplyr::duckdb_tibble()] objects, each representing a
+#' @return A named list of 9  [tibble::tibble()] objects, each representing a
 #'   different health register: `bef`, `lmdb`, `lpr_adm`, `lpr_diag`,
 #'   `kontakter`, `diagnoser`, `sysi`, `sssy`, and `lab_forsker`.
 #' @export
@@ -350,6 +350,5 @@ edge_cases <- function() {
       out <- rlang::set_names(out, name)
     }) |>
     purrr::flatten() |>
-    purrr::map(duckplyr::as_duckdb_tibble) |>
-    append(list(classified = duckplyr::as_duckdb_tibble(classified)))
+    append(list(classified = classified))
 }
@@ -42,7 +42,7 @@ join_inclusions <- function(
     dplyr::mutate(
       dplyr::across(
         dplyr::starts_with("has_"),
-        ~ dplyr::coalesce(any(.x), FALSE)
+        \(x) any(dplyr::coalesce(x, FALSE), na.rm = TRUE)
       ),
       .by = "pnr"
     )
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ create_inclusion_dates <- function(`
`42`	`42`	`# Set the stable inclusion date to NA if the raw inclusion date is before`
`43`	`43`	`# stable_inclusion_start_date.`
`44`	`44`	`stable_inclusion_date = dplyr::if_else(`
`45`		`- .data$raw_inclusion_date < as_date(stable_inclusion_start_date),`
	`45`	`+ .data$raw_inclusion_date < as.Date(stable_inclusion_start_date),`
`46`	`46`	`NA,`
`47`	`47`	`.data$raw_inclusion_date`
`48`	`48`	`)`
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ join_inclusions <- function(`
`42`	`42`	`dplyr::mutate(`
`43`	`43`	`dplyr::across(`
`44`	`44`	`dplyr::starts_with("has_"),`
`45`		`- ~ dplyr::coalesce(any(.x), FALSE)`
	`45`	`+ \(x) any(dplyr::coalesce(x, FALSE), na.rm = TRUE)`
`46`	`46`	`),`
`47`	`47`	`.by = "pnr"`
`48`	`48`	`)`