Skip to content

Commit cddfa03

Browse files
committed
update
1 parent 289cd92 commit cddfa03

16 files changed

+117
-430
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Imports:
1818
License: Apache License 2.0
1919
Encoding: UTF-8
2020
LazyData: true
21-
RoxygenNote: 6.1.1
21+
RoxygenNote: 7.0.2
2222
Suggests: knitr,
2323
rmarkdown
2424
VignetteBuilder: knitr

R/StudySpecific.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ GROUP BY o.stratum1_id, o.stratum2_id, o.total, o.min_value, o.max_value, o.avg_
509509
#' Execute all components of the DataQuality study (resultsDatabaseSchema is where Achilles results are)
510510
#' @param connectionDetails connection
511511
#' @param connectionDetails2 more study parameters
512+
#' @param runViaAchilles flag that runs legacy analysis using Achilles measure (analysis)
512513

513514
#' @export
514515
dashboardLabThresholds <- function(connectionDetails,

R/TestCode.R

Lines changed: 0 additions & 32 deletions
This file was deleted.

README.md

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,23 @@
22

33
This is an R package that has sume utilities and also supports informatics studies that focuses on data quality (rather than a clinical question). It may also support development of other tools.
44

5+
6+
# Support development of Data Quality Dashboard (DQD)
7+
8+
Package has functions that support addition of more rules and knowledge base items to the Data Quality Dashboard. For example, we want to compute 3rd and 97th percentile (or other parameters) for some lab results to use those as thresholds.
9+
10+
A site can participate on developing knowledbe base for DQD. Or it can take part on format research study (to be published). Se [extras/protocol](extras/protocol) folder for protocol.
11+
12+
To run the study, see DQD section in file [extras/CodeToRun.R](extras/CodeToRun.R)
13+
14+
15+
No need to read futher if you only need to run DQD study.
16+
17+
18+
19+
20+
# Additional info (not needed for DQD study)
21+
522
For the study, the protocol is available in extras folder.
623

724
Forum link: http://forums.ohdsi.org/t/ohdsi-informatics-study-data-quality/1857/2
@@ -27,16 +44,6 @@ There are several usage scenarios.
2744
- adding MIAD
2845

2946

30-
# Support development of Data Quality Dashboard
31-
32-
Package has functions that support addition of more rules and knowledge base items to the Data Quality Dashboard. For example, we want to compute 3rd and 97th percentile (or other parameters) for some lab results to use those as thresholds.
33-
34-
This is related to unit analysis done by https://github.com/vojtechhuser/ThemisConcepts
35-
36-
A site can participate on developing knowledbe base for DQD. Or it can take part on format research study (to be published). Se [extras/protocol](extras/protocol) folder for protocol.
37-
38-
To run the study, see DQD section in file [extras/TestCode.R](extras/TestCode.R)
39-
4047

4148

4249
# Generate MIAD (minumum information about a dataset)

extras/CentralProcessing.R

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,17 @@ load('o:/athena/concept.rda')
4646

4747
#reading a single site data (for now)
4848
f<-'d:/OneDrive - National Institutes of Health/temp/dqd/export'
49+
f<-'d:/OneDrive - National Institutes of Health/ohdsi/thresholds'
4950

5051
sfiles<-c(file.path(f,'1ThresholdsA.csv'))
51-
sfiles<-c(file.path(f,'1ThresholdsA.csv'),file.path(f,'ThresholdsA.csv'))
52+
sfiles<-c(file.path(f,'test-ThresholdsA.csv'))
53+
sfiles<-c(file.path(f,'1ThresholdsA.csv'),file.path(f,'ThresholdsA.csv'),file.path(f,'test-ThresholdsA.csv'))
5254
ll<-map(sfiles,read_csv)
5355
ll
5456

5557
#ll<-map(p$pid,doProperty())
56-
ll2<-map2(ll,sfiles,~mutate(.x,site=.y))
58+
#strip name from full path trick
59+
ll2<-map2(ll,basename(sfiles),~mutate(.x,site=.y))
5760
d<-bind_rows(ll2)
5861

5962
#add terminology concepts
@@ -66,13 +69,20 @@ d2<-d %>% filter(stratum_1 != 0) %>% filter(stratum_2 != 0) %>% left_join(sconce
6669
names(d2)
6770

6871
#remove columns that are not needed
69-
d3<-d2 %>% select(-stratum_3,-stratum_4,-stratum_5,-p25_value,-p75_value) %>%
70-
filter(count_value >=100 ) %>% arrange(stratum_1,desc(count_value) )
72+
# d3<-d2 %>% select(-stratum_3,-stratum_4,-stratum_5,-p25_value,-p75_value) %>%
73+
# filter(count_value >=100 ) %>% arrange(stratum_1,desc(count_value) )
74+
75+
d3<-d2 %>% select(-stratum_3,-stratum_4,-stratum_5) %>%
76+
arrange(stratum_1,desc(count_value) )
77+
7178

7279
d3 %>% count(site)
73-
ba<-d3 %>% count(stratum_1,stratum_2)
80+
names(d3)
81+
ba<-d3 %>% group_by(stratum_1,stratum_2,concept_name.x,concept_name.y) %>% summarize(tcnt=sum(count_value),n=n())
7482
ba %>% filter(n>=2)
7583

84+
85+
7686
#24 test-unit pairs have 2 results
7787

7888

@@ -149,3 +159,17 @@ over %>% select(conceptName,unitConceptName,plausibleValueHigh,max_value)
149159

150160
#5g/dL into mg/dL (is 5000 mg/dL)
151161
#in data is in fact unit/L
162+
163+
164+
165+
#unitmorph
166+
#Protein [Mass/volume] in Serum or Plasma 7096851 4 gram per deciliter|unit|milligram per deciliter|gram per liter
167+
# gram per deciliter| |milligram per deciliter | gram per liter
168+
names(d3)
169+
bb<-d3 %>% filter(site=='ThresholdsA.csv') %>% group_by(stratum_1,concept_name.x) %>%
170+
summarize(tcnt=sum(count_value)
171+
,n=n(),units=paste(concept_name.y,collapse = '|')
172+
,cnts=paste(count_value ,collapse = '|')
173+
,unitcids=paste(stratum_2,collapse = '|')
174+
)
175+
bb %>% write_csv('local/morphA.csv')
Lines changed: 14 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,42 @@
1-
#' @file TestCode.R
2-
3-
4-
library(DataQuality)
5-
6-
workFolder <- 'c:/temp'
7-
8-
#get connection details
9-
source('c:/r/conn.R') #
10-
11-
#database parameters
12-
cdmDatabaseSchema <-'ccae'
13-
resultsDatabaseSchema <-'ccae' #at most sites this likely will not be the same as cdmDatabaseSchema
14-
15-
workFolder <- 'c:/temp' #this folder must exist (use forward slashes)
16-
17-
18-
executeDQ(connectionDetails = connectionDetails,cdmDatabaseSchema = cdmDatabaseSchema,workFolder = workFolder)
19-
cd2<-.createConnectionDetails2(cdmDatabaseSchema = cdmDatabaseSchema)
20-
21-
22-
#DatabaseConnector::createConnectionDetails()
23-
#packageResults(connectionDetails,cdmDatabaseSchema,workFolder)
24-
25-
submitResults(exportFolder =file.path(workFolder,'export'),
26-
studyBucketName = 'ohdsi-study-dataquality',
27-
key=studyKey,
28-
secret =studySecret
29-
)
1+
#' @file CodeToRun.R
302

313

324
#DQD section-----------------------------------------------
335
#----------------------------------------------------------
346
#----------------------------------------------------------
7+
8+
9+
#---LOAD THE R PACKAGE
3510
#uncomment the line below to install the latest version of the code and package
3611
#devtools::install_github("vojtechhuser/DataQuality")
37-
#testing it on Eunomia
12+
13+
14+
15+
#load the package
3816
library(DataQuality)
3917

4018

19+
#---SPECIFY YOUR LOCAL PARAMTERS BELOW-----
4120
#replace with your local parameters using the same variable names
4221
#note: it will run some achilles measures (so it may overwrite your current achilles results table)
43-
library(Eunomia)
4422
connectionDetails<-Eunomia::getEunomiaConnectionDetails()
4523
cdmDatabaseSchema <-'main'
4624
resultsDatabaseSchema <-'main' #at most sites this likely will not be the same as cdmDatabaseSchema
4725
workFolder <- 'c:/temp/dqd' #this folder must exist (use forward slashes)
4826

49-
#just a helper construct to package all study details into one object (similar as done with database details)
27+
#just a helper construct to package all study details into one object (similar as done with database connection details)
5028
connectionDetails2<-DataQuality:::.createConnectionDetails2(cdmDatabaseSchema = cdmDatabaseSchema
5129
,resultsDatabaseSchema = resultsDatabaseSchema
5230
,workFolder = workFolder
5331
)
5432

33+
34+
#----EXECUTE THE DQD THRESHOLD STUDY
5535
#execute the core of the DQD helper analysis, outptu will be writen as CSV file into an export folder
5636

5737
DataQuality::dashboardLabThresholds(connectionDetails = connectionDetails,connectionDetails2 = connectionDetails2)
5838

59-
#results will be in workfolder, subfolder export (will be zipped once we have more output)
39+
#results are in workfolder, subfolder export, inspect it and
40+
#email the CSV file Thresholds to the study PI
6041

6142

0 commit comments

Comments
 (0)