Skip to content

Commit 3a0d1d1

Browse files
authored
Merge pull request #14 from SivamshIndukuri/treatment_patterns
Treatment patterns
2 parents 516a721 + 2ac3696 commit 3a0d1d1

File tree

10 files changed

+572
-2
lines changed

10 files changed

+572
-2
lines changed

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export(generateFullReport)
77
export(generatePresentation)
88
export(generatePresentationMultiple)
99
export(generateSummaryPredictionReport)
10+
export(getAnalysisCohorts)
1011
export(getBinaryCaseSeries)
1112
export(getBinaryRiskFactors)
1213
export(getCMEstimation)
@@ -41,6 +42,7 @@ export(getContinuousRiskFactors)
4142
export(getDatabaseDetails)
4243
export(getDechallengeRechallenge)
4344
export(getDechallengeRechallengeFails)
45+
export(getEventDuration)
4446
export(getExampleConnectionDetails)
4547
export(getFullPredictionPerformances)
4648
export(getIncidenceOutcomes)
@@ -75,6 +77,7 @@ export(getTargetBinaryFeatures)
7577
export(getTargetContinuousFeatures)
7678
export(getTargetTable)
7779
export(getTimeToEvent)
80+
export(getTreatmentPathways)
7881
export(kableDark)
7982
export(plotAgeDistributions)
8083
export(plotCmEstimates)

R/TreatmentPatternsQueries.R

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
#' Extracts the different analyses ran for each target and event cohorts
2+
#' @description
3+
#' This function extracts analysis ids, events cohorts, and databases per target from treatment patterns
4+
#'
5+
#' @details
6+
#' Specify the connectionHandler and the schema
7+
#'
8+
#' @template connectionHandler
9+
#' @template schema
10+
#' @template tpTablePrefix
11+
#' @family Treatment Patterns
12+
#' @return
13+
#' Returns a data.frame with the columns:
14+
#' \itemize{
15+
#' \item{databaseName a concatinated string of all the database names ran for that analysis}
16+
#' \item{databaseId a concatinated string of all the database ids ran for that analysis}
17+
#' \item{analysisId the analysis ids for the treament patterns run}
18+
#' \item{targetCohortName the target cohort name}
19+
#' \item{targetCohortId the target cohort unique identifier}
20+
#' \item{eventCohortList a concatinated string of all the event cohort names ran for that target}
21+
#' \item{exitCohortList a concatinated string of all the exit cohort names ran for that target}
22+
#' }
23+
#'
24+
#' @export
25+
#'
26+
#' @examples
27+
#' conDet <- getExampleConnectionDetails()
28+
#'
29+
#' connectionHandler <- ResultModelManager::ConnectionHandler$new(conDet)
30+
#'
31+
#' cohortAnalysis <- getIncidenceRates(
32+
#' connectionHandler = connectionHandler,
33+
#' schema = "main"
34+
#' )
35+
getAnalysisCohorts <- function(
36+
connectionHandler,
37+
schema,
38+
tpTablePrefix = "tp_"
39+
) {
40+
sql <- "SELECT
41+
d.CDM_SOURCE_ABBREVIATION AS database_name,
42+
d.database_id AS database_id,
43+
t.analysis_id,
44+
t.cohort_name,
45+
t.cohort_id,
46+
t.type
47+
FROM @schema.@tp_table_prefixanalysis_cohorts t
48+
Inner JOIN @schema.@tp_table_prefixcdm_source_info d
49+
ON t.analysis_id = d.analysis_id"
50+
51+
result <- connectionHandler$queryDb(
52+
sql = sql,
53+
schema = schema,
54+
tp_table_prefix = tpTablePrefix,
55+
)
56+
57+
targets <- result %>%
58+
dplyr::filter(type == "target") %>%
59+
dplyr::distinct(analysisId, cohortId, .keep_all = TRUE) %>%
60+
dplyr::select(analysisId, targetCohortId = cohortId, targetCohortName = cohortName)
61+
62+
events <- result %>%
63+
dplyr::filter(type == "event") %>%
64+
dplyr::distinct(analysisId, cohortId, .keep_all = TRUE) %>%
65+
dplyr::group_by(analysisId) %>%
66+
dplyr::summarise(eventCohortList = paste(cohortName, collapse = ", "), .groups = "drop")
67+
68+
exits <- result %>%
69+
dplyr::filter(type == "exit") %>%
70+
dplyr::distinct(analysisId, cohortId, .keep_all = TRUE) %>%
71+
dplyr::group_by(analysisId) %>%
72+
dplyr::summarise(exitCohortList = paste(cohortName, collapse = ", "), .groups = "drop")
73+
74+
databases <- result %>%
75+
dplyr::distinct(analysisId, databaseId, .keep_all = TRUE) %>%
76+
dplyr::group_by(analysisId) %>%
77+
dplyr::summarise(
78+
databaseId = paste(databaseId, collapse = ", "),
79+
databaseName = paste(databaseName, collapse = ", "),
80+
.groups = "drop"
81+
)
82+
83+
final <- targets %>%
84+
dplyr::left_join(events, by = "analysisId") %>%
85+
dplyr::left_join(exits, by = "analysisId") %>%
86+
dplyr::left_join(databases, by = "analysisId")
87+
88+
return(final)
89+
}
90+
91+
92+
#' Extracts treatment pathways
93+
#' @description
94+
#' This function extracts results pathways for specified analysis ids and target cohorts
95+
#'
96+
#' @details
97+
#' Specify the connectionHandler and the schema
98+
#'
99+
#' @template connectionHandler
100+
#' @template schema
101+
#' @template tpTablePrefix
102+
#' @template databaseTable
103+
#' @param age (optional) a string representing an age bucket to restrict (e.g., "0-17", "18-34", "65+")
104+
#' @param sex (optional) A string "male" or "female" to restrict
105+
#' @param indexYear (optional) A string with a four-digit year to restrict
106+
#' @param analysisIds (optional) A vector of analysis ids to restrict to
107+
#' @param databaseIds (optional) A vector of database ids to restrict to
108+
#' @param databaseNames (optional) A vector of database Names to restrict to
109+
#' @param targetIds (optional) A vector of target cohort ids to restrict to
110+
#' @family Treatment Patterns
111+
#' @return
112+
#' Returns a data.frame with the columns:
113+
#' \itemize{
114+
#' \item{databaseName the name of the database}
115+
#' \item{databaseId the unique identifier of the database}
116+
#' \item{analysisId the unique identifier of a treament patterns run}
117+
#' \item{targetCohortName the target cohort name}
118+
#' \item{targetCohortId the target cohort unique identifier}
119+
#' \item{pathway a string representing the progression of events for a target. Use '-' to separate sequential steps and '+' for combination of events at that step}
120+
#' \item{freq the count of pathway occurance}
121+
#' \item{age the stratifyed pathways for age}
122+
#' \item{indexYear the stratifyed pathways for index year}
123+
#' \item{sex the stratifyed pathways for sex}
124+
125+
#' }
126+
#'
127+
#' @export
128+
#'
129+
#' @examples
130+
#' conDet <- getExampleConnectionDetails()
131+
#'
132+
#' connectionHandler <- ResultModelManager::ConnectionHandler$new(conDet)
133+
#'
134+
#' cohortAnalysis <- getIncidenceRates(
135+
#' connectionHandler = connectionHandler,
136+
#' schema = "main"
137+
#' )
138+
getTreatmentPathways <- function(
139+
connectionHandler,
140+
schema,
141+
tpTablePrefix = "tp_",
142+
databaseTable = "database_meta_data",
143+
age = "all",
144+
sex = "all",
145+
indexYear = "all",
146+
analysisIds = NULL,
147+
databaseIds = NULL,
148+
databaseNames = NULL,
149+
targetIds = NULL
150+
) {
151+
sql <- "SELECT
152+
d.CDM_SOURCE_ABBREVIATION AS database_name,
153+
t.database_id,
154+
t.analysis_id,
155+
t.target_cohort_name,
156+
t.target_cohort_id,
157+
t.freq,
158+
t.pathway,
159+
t.age,
160+
t.index_year,
161+
t.sex
162+
FROM @schema.@tp_table_prefixtreatment_pathways t
163+
INNER JOIN
164+
@schema.@database_table d
165+
ON t.database_id = d.database_id
166+
WHERE
167+
t.age = @age AND
168+
t.index_year = @index_year AND
169+
t.sex = @sex
170+
{@use_targets}?{AND t.target_cohort_id IN (@target_ids) }
171+
{@use_database_id}?{AND t.database_id IN (@database_id) }
172+
{@use_analysis}?{and t.analysis_id IN (@analysis_ids)}
173+
{@use_database_name}?{and d.CDM_SOURCE_ABBREVIATION IN (@database_name)}
174+
"
175+
176+
result <- connectionHandler$queryDb(
177+
sql = sql,
178+
schema = schema,
179+
tp_table_prefix = tpTablePrefix,
180+
database_table = databaseTable,
181+
use_targets = !is.null(targetIds),
182+
target_ids = paste0(targetIds, collapse = ","),
183+
use_analysis = !is.null(analysisIds),
184+
analysis_ids = paste0(analysisIds, collapse = ","),
185+
use_database_id = !is.null(databaseIds),
186+
database_id = paste0("'", databaseIds, "'", collapse = ","),
187+
use_database_name = !is.null(databaseNames),
188+
database_name = paste0("'", databaseNames, "'", collapse = ","),
189+
age = paste0("'", age, "'"),
190+
sex = paste0("'", sex, "'"),
191+
index_year = paste0("'", indexYear, "'")
192+
)
193+
194+
return(result)
195+
}
196+
197+
#' Extracts summary of event duration
198+
#' @description
199+
#' This function extracts results summary stats of event duration for specified analysis ids and target cohorts
200+
#'
201+
#' @details
202+
#' Specify the connectionHandler, the schema, and the analysisIds
203+
#'
204+
#' @template connectionHandler
205+
#' @template schema
206+
#' @template tpTablePrefix
207+
#' @template databaseTable
208+
#' @param analysisIds A vector of analysis ids to restrict to
209+
#' @param databaseIds (optional) A vector of database ids to restrict to
210+
#' @param databaseNames (optional) A vector of database Names to restrict to
211+
#' @param targetIds (optional) A vector of target cohort ids to restrict to
212+
#' @family Treatment Patterns
213+
#' @return
214+
#' Returns a data.frame with the columns:
215+
#' \itemize{
216+
#' \item{databaseName the name of the database}
217+
#' \item{databaseId the unique identifier of the database}
218+
#' \item{analysisId the unique identifier of a treament patterns run}
219+
#' \item{targetCohortId the target cohort unique identifier}
220+
#' \item{targetCohortName the target cohort name}
221+
#' \item{eventName a string representing an events for a target. Uses '+' for combination of event cohorts}
222+
#' \item{rank the step number of event occurance}
223+
#' \item{eventCount the count of event occurance at rank}
224+
#' \item{durationAverage the average duration of event}
225+
#' \item{durationMax the maximum duration of event}
226+
#' \item{durationMin the minimum duration of event}
227+
#' \item{durationMedian the median duration of event}
228+
#' \item{p25Value the 25th percentile for duration of event}
229+
#' \item{p75Value the 75th percentile for duration of event}
230+
#' \item{standardDeviation the standard deviation for duration of event}
231+
#' }
232+
#'
233+
#' @export
234+
#'
235+
#' @examples
236+
#' conDet <- getExampleConnectionDetails()
237+
#'
238+
#' connectionHandler <- ResultModelManager::ConnectionHandler$new(conDet)
239+
#'
240+
#' cohortAnalysis <- getIncidenceRates(
241+
#' connectionHandler = connectionHandler,
242+
#' schema = "main",
243+
#' analysisIds = c(1)
244+
#' )
245+
getEventDuration <- function(
246+
connectionHandler,
247+
schema,
248+
analysisIds,
249+
tpTablePrefix = "tp_",
250+
databaseTable = "database_meta_data",
251+
databaseIds = NULL,
252+
databaseNames = NULL,
253+
targetIds = NULL
254+
) {
255+
sql <- "SELECT
256+
d.CDM_SOURCE_ABBREVIATION AS database_name,
257+
t.database_id,
258+
t.analysis_id,
259+
t.target_cohort_id,
260+
t.target_cohort_name,
261+
t.event_name,
262+
t.line AS rank,
263+
t.event_count,
264+
t.duration_average,
265+
t.duration_max,
266+
t.duration_min,
267+
t.duration_median,
268+
t.duration_q_1 as p_25_value,
269+
t.duration_q_2 as p_75_value,
270+
t.duration_sd as standard_deviation
271+
FROM @schema.@tp_table_prefixsummary_event_duration t
272+
INNER JOIN
273+
@schema.@database_table d
274+
ON t.database_id = d.database_id
275+
WHERE t.analysis_id IN (@analysis_ids)
276+
{@use_targets}?{AND t.target_cohort_id IN (@target_ids) }
277+
{@use_database_id}?{AND t.database_id IN (@database_id) }
278+
{@use_database_name}?{and d.CDM_SOURCE_ABBREVIATION IN (@database_name)}
279+
"
280+
281+
result <- connectionHandler$queryDb(
282+
sql = sql,
283+
schema = schema,
284+
tp_table_prefix = tpTablePrefix,
285+
database_table = databaseTable,
286+
use_targets = !is.null(targetIds),
287+
target_ids = paste0(targetIds, collapse = ","),
288+
analysis_ids = paste0(analysisIds, collapse = ","),
289+
use_database_id = !is.null(databaseIds),
290+
database_id = paste0("'", databaseIds, "'", collapse = ","),
291+
use_database_name = !is.null(databaseNames),
292+
database_name = paste0("'", databaseNames, "'", collapse = ","),
293+
)
294+
295+
return(result)
296+
}

extras/createExampleData.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Strategus::execute(
2828
executionSettings = Strategus::createCdmExecutionSettings(
2929
workDatabaseSchema = 'main',
3030
cdmDatabaseSchema = cdmDatabase,
31-
cohortTableNames = CohortGenerator::getCohortTableNames('cohort'),
31+
cohortTableNames = CohortGenerator::getCohortTableNames('cohort_table'),
3232
workFolder = file.path(outputFolder, 'work'),
3333
resultsFolder = file.path(outputFolder, 'result')#,
3434
#modulesToExecute = c('CohortGeneratorModule','SelfControlledCaseSeriesModule')
-33.5 KB
Binary file not shown.

man-roxygen/tpTablePrefix.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#' @param tpTablePrefix The prefix used for the cohort generator results tables

man/createPredictionReport.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/getAnalysisCohorts.Rd

Lines changed: 49 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)