Skip to content

Commit 55a9c8a

Browse files
committed
v5
1 parent 9acb36b commit 55a9c8a

File tree

8 files changed

+194
-8
lines changed

8 files changed

+194
-8
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: DataQuality
22
Title: Data Quality study in OHDSI and data quality evaluation
3-
Version: 4.1
3+
Version: 5.0
44
Authors@R: "Vojtech Huser <vojtech.huser@nih.gov> [aut, cre]"
55
Description: This package has several functions. It supports Data Quality Dashboard and OHDSI Data Quality study.
66
It also allows Data Quality evaluation (even after closure of the formal study).

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ export(.createConnectionDetails2)
55
export(checkThemis)
66
export(createMIAD)
77
export(customMeasure)
8+
export(customMeasureSql)
89
export(dashboardLabThresholds)
10+
export(dashboardLabValueAsConceptID)
911
export(doTree)
1012
export(executeDQ)
1113
export(packageResults)

R/StudySpecific.R

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,3 +613,127 @@ dashboardLabThresholds <- function(connectionDetails,
613613
class(result) <- "connectionDetails2"
614614
return(result)
615615
}
616+
617+
618+
619+
#' Execute all components of the DataQuality study (resultsDatabaseSchema is where Achilles results are)
620+
#' @param connectionDetails connection
621+
#' @param connectionDetails2 more study parameters
622+
#' @param runViaAchilles flag that runs legacy analysis using Achilles measure (analysis)
623+
#' @param exportThreshold removes rows that have fewer than specified events (default is 11)
624+
625+
#' @export
626+
dashboardLabValueAsConceptID <- function(connectionDetails,
627+
connectionDetails2,runViaAchilles=FALSE,exportThreshold=11
628+
){
629+
630+
631+
#create export folder
632+
#create export subfolder in workFolder
633+
exportFolder <- file.path(connectionDetails2$workFolder, "export")
634+
if (!file.exists(exportFolder))
635+
dir.create(exportFolder)
636+
637+
638+
#add readme file
639+
#file.copy(system.file("dqd/readme.txt",package="DataQuality"), exportFolder)
640+
#multiple steps here exporting to export folder
641+
642+
#disabled for now
643+
#if (runViaAchilles) {
644+
if (FALSE) {
645+
646+
writeLines("----Running some Achilles Measures")
647+
Achilles::achilles(connectionDetails = connectionDetails
648+
,cdmDatabaseSchema = connectionDetails2$cdmDatabaseSchema
649+
,resultsDatabaseSchema = connectionDetails2$resultsDatabaseSchema
650+
,cdmVersion = connectionDetails2$cdmVersion
651+
,analysisIds = c(1807,1815) #,1816,1817)
652+
,runHeel = FALSE
653+
,createIndices = FALSE
654+
,verboseMode = TRUE)
655+
units<-Achilles::fetchAchillesAnalysisResults(connectionDetails = connectionDetails,resultsDatabaseSchema = connectionDetails2$resultsDatabaseSchema
656+
,analysisId = 1807)
657+
658+
units2<-units$analysisResults
659+
names(units2) <- tolower(names(units2))
660+
#names(units2)
661+
#take those that have both defined
662+
#str(units2)
663+
units2$measurement_concept_id <-as.integer(units2$measurement_concept_id)
664+
units2$unit_concept_id <-as.integer(units2$unit_concept_id)
665+
#units, must have few numbers and non zero units
666+
667+
668+
#more numbers
669+
a<-.fetchAchillesAnalysisDistResults(connectionDetails = connectionDetails,resultsDatabaseSchema = connectionDetails2$resultsDatabaseSchema
670+
# ,AnalysesAsSqlInCode = "1815,1816,1817")
671+
,AnalysesAsSqlInCode = "1815")
672+
673+
674+
675+
676+
selected<-units2 %>% dplyr::filter( count_value>100 & unit_concept_id !=0 )
677+
writeLines(paste("-----count of suitable measurements for analysis:",nrow(selected)))
678+
679+
# write.csv(selected,file = file.path(exportFolder,'SuitableMeasurementsAndUnits.csv'),row.names = F)
680+
# write.csv(a,file = file.path(exportFolder,'ThresholdsA.csv'),row.names = F)
681+
}
682+
683+
#execution via custom SQL (not to have dependency on Achilles)
684+
#-- 1822 Number of measurement records, by measurement_concept_id and value_as_concept_id
685+
sql='
686+
select
687+
1822 AS analysis_id,
688+
measurement_concept_id,
689+
value_as_concept_id,
690+
count_big(*) AS count_value
691+
from @cdmDatabaseSchema.measurement
692+
group by measurement_concept_id, value_as_concept_id;
693+
'
694+
b<-customMeasureSql(connectionDetails = connectionDetails,connectionDetails2 = connectionDetails2,sql=sql)
695+
names(b) <- tolower(names(b))
696+
b<-dplyr::filter(b, count_value>=exportThreshold)
697+
698+
#convert values to percentages possibly here
699+
options(scipen=999) #disable scientific notation
700+
b2<-b %>% dplyr::group_by(measurement_concept_id) %>% dplyr::mutate(perc= count_value / sum(count_value)) %>%
701+
dplyr::arrange(measurement_concept_id,desc(perc) ) %>% select(-count_value)
702+
703+
704+
705+
writeLines(paste("-----count of measurements with coded value:",nrow(b)))
706+
707+
writeLines(paste("--writing output file LabCodedValuesResults to export folder:",exportFolder))
708+
709+
710+
write.csv(b2,file = file.path(exportFolder,'LabCodedValueResults.csv'),row.names = F)
711+
712+
#final cleanup
713+
writeLines("--Done with dashboardLabValueAsConceptID")
714+
715+
}
716+
717+
718+
#' do a custom measure
719+
#' @param connectionDetails connection
720+
#' @param connectionDetails2 more study parameters
721+
#' @param sql sql code to run
722+
723+
#' @export
724+
customMeasureSql <- function(connectionDetails,
725+
connectionDetails2,
726+
sql){
727+
728+
729+
sql <- SqlRender::render(sql,cdmDatabaseSchema=connectionDetails2$cdmDatabaseSchema)
730+
#cat(sql)
731+
sql <- SqlRender::translate(sql,targetDialect = connectionDetails$dbms)
732+
733+
conn <- DatabaseConnector::connect(connectionDetails)
734+
data <- DatabaseConnector::querySql(conn, sql)
735+
#View(data)
736+
737+
DatabaseConnector::dbDisconnect(conn)
738+
return(data)
739+
}

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,20 @@ This is an R package that has sume utilities and also supports informatics stud
55

66
# Support development of Data Quality Dashboard (DQD)
77

8-
Package has functions that support addition of more rules and knowledge base items to the Data Quality Dashboard. For example, we want to compute 3rd and 97th percentile (or other parameters) for some lab results to use those as thresholds.
8+
Package has functions that support addition of more rules and knowledge base items to the Data Quality Dashboard. For example, we want to compute 3rd and 97th percentile (or other parameters) for some lab results to use those as thresholds. We also analyze what value_as_concept_ids are used with specific measurement_concept_ids to help OHDSI network to standardize value_as_concept_id for critical measurements.
99

1010
A site can participate on developing knowledbe base for DQD. Or it can take part on format research study (to be published). Se [extras/protocol](extras/protocol) folder for protocol.
1111

1212
To run the study, see DQD section in file [extras/CodeToRun.R](extras/CodeToRun.R)
1313

1414

15+
# Versions
16+
17+
## v4.1
18+
Simplified release for DQD purpose of the package
19+
20+
21+
## v5.0
22+
Addition of functionality to analyze value_as_concept_ids
23+
24+

extras/CentralProcessingDQDThresholds.R

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,11 @@ llmoded<-map(ll,~{names(.x)<-tolower(names(.x));return(.x)})
4141
ll2<-map2(llmoded,basename(sfiles),~mutate(.x,site=.y))
4242
d<-bind_rows(ll2)
4343

44+
45+
#---end of reading input files
46+
4447
#add terminology concepts
45-
sconcept<-concept %>% select(concept_id,concept_name)
48+
sconcept<-concept %>% select(concept_id,concept_name,concept_code)
4649
names(d) <- tolower(names(d))
4750
names(d)
4851
#remove no units rows and expand the CIDs
@@ -52,8 +55,8 @@ names(d)
5255

5356

5457
d2<-d %>% filter(count_value >=11 ) %>% filter(stratum1_id != 0) %>% filter(stratum2_id != 0) %>%
55-
left_join(sconcept,by=c('stratum1_id'='concept_id')) %>%
56-
left_join(sconcept,by=c('stratum2_id'='concept_id')) %>% filter(!is.na(concept_name.x))
58+
left_join(concept %>% select(concept_id,concept_name,concept_code),by=c('stratum1_id'='concept_id')) %>%
59+
left_join(concept %>% select(concept_id,concept_name),by=c('stratum2_id'='concept_id')) %>% filter(!is.na(concept_name.x))
5760
#test in 2B range are excluded by last filter
5861

5962
names(d2)
@@ -66,7 +69,7 @@ soverview
6669

6770

6871

69-
ba<-d2 %>% group_by(stratum1_id,stratum2_id,concept_name.x,concept_name.y) %>% summarize(tcnt=sum(count_value),n=n())
72+
ba<-d2 %>% group_by(stratum1_id,stratum2_id,concept_name.x,concept_name.y,concept_code) %>% summarize(tcnt=sum(count_value),n=n())
7073
ba %>% filter(n>=2) %>% nrow()
7174
nrow(ba)
7275
#4465 distinct test-unit pairs
@@ -179,7 +182,8 @@ viewSites
179182
#bc<-ddriven %>% inner_join(bset)
180183
#3009542 hematocrit
181184
options(scipen=999) #disable scientific notation
182-
prekb<-ddriven %>% group_by(conceptId,concept_name.x,unitConceptId,concept_name.y) %>% summarize(
185+
names(ddriven)
186+
prekb<-ddriven %>% group_by(conceptId,concept_name.x,unitConceptId,concept_name.y,concept_code) %>% summarize(
183187
n=n()
184188
,sum_count_value=sum(count_value)
185189
#,kb_min_mean=mean(min_value)

extras/CodeToRun.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,11 @@ connectionDetails2<-DataQuality:::.createConnectionDetails2(cdmDatabaseSchema =
4747

4848
DataQuality::dashboardLabThresholds(connectionDetails = connectionDetails,connectionDetails2 = connectionDetails2)
4949

50+
#second component added in May 2020
51+
DataQuality::dashboardLabValueAsConceptID(connectionDetails = connectionDetails,connectionDetails2 = connectionDetails2)
52+
5053
#results are in workfolder, subfolder export, inspect it and
51-
#email the CSV file called Thresholds to the study PI
54+
#email the CSV files called Thresholds and LabCodedValueResults to the study PI
5255

5356
#for simplicity, the package is not using submission of the results via OHDSI AWS infrustructure
5457
#(if requested by site, the code can be provided, though)

man/customMeasureSql.Rd

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/dashboardLabValueAsConceptID.Rd

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)