Skip to content

Commit 1bd330b

Browse files
committed
network study code
1 parent 7af76c8 commit 1bd330b

File tree

2 files changed

+188
-135
lines changed

2 files changed

+188
-135
lines changed

.Rhistory

Lines changed: 134 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,137 +1,3 @@
1-
return(result)
2-
}
3-
#' more details
4-
#' @export
5-
.createConnectionDetails2<-function (cdmDatabaseSchema) {
6-
result <- list()
7-
for (name in names(formals(createConnectionDetails))) {
8-
result[[name]] <- get(name)
9-
}
10-
values <- lapply(as.list(match.call())[-1], function(x) eval(x,
11-
envir = sys.frame(-3)))
12-
for (name in names(values)) {
13-
if (name %in% names(result))
14-
result[[name]] <- values[[name]]
15-
}
16-
class(result) <- "connectionDetails2"
17-
return(result)
18-
}
19-
cd2<-.createConnectionDetails2(cdmDatabaseSchema = cdmDatabaseSchema)
20-
#' more details
21-
#' @export
22-
.createConnectionDetails2<-function (cdmDatabaseSchema) {
23-
result <- list()
24-
for (name in names(formals(createConnectionDetails2))) {
25-
result[[name]] <- get(name)
26-
}
27-
values <- lapply(as.list(match.call())[-1], function(x) eval(x,
28-
envir = sys.frame(-3)))
29-
for (name in names(values)) {
30-
if (name %in% names(result))
31-
result[[name]] <- values[[name]]
32-
}
33-
class(result) <- "connectionDetails2"
34-
return(result)
35-
}
36-
cd2<-.createConnectionDetails2(cdmDatabaseSchema = cdmDatabaseSchema)
37-
#' more details
38-
#' @export
39-
.createConnectionDetails2<-function (cdmDatabaseSchema) {
40-
result <- list()
41-
for (name in names(formals(.createConnectionDetails2))) {
42-
result[[name]] <- get(name)
43-
}
44-
values <- lapply(as.list(match.call())[-1], function(x) eval(x,
45-
envir = sys.frame(-3)))
46-
for (name in names(values)) {
47-
if (name %in% names(result))
48-
result[[name]] <- values[[name]]
49-
}
50-
class(result) <- "connectionDetails2"
51-
return(result)
52-
}
53-
cd2<-.createConnectionDetails2(cdmDatabaseSchema = cdmDatabaseSchema)
54-
cd2
55-
#' more details
56-
#' @export
57-
.createConnectionDetails2<-function (cdmDatabaseSchema,resultsDatabaseSchema=NULL,cdmVersion=NULL,cohortTable='cohort') {
58-
result <- list()
59-
for (name in names(formals(.createConnectionDetails2))) {
60-
result[[name]] <- get(name)
61-
}
62-
values <- lapply(as.list(match.call())[-1], function(x) eval(x,
63-
envir = sys.frame(-3)))
64-
for (name in names(values)) {
65-
if (name %in% names(result))
66-
result[[name]] <- values[[name]]
67-
}
68-
class(result) <- "connectionDetails2"
69-
return(result)
70-
}
71-
connectionDetails2<-.createConnectionDetails2(cdmDatabaseSchema = cdmDatabaseSchema,resultsDatabaseSchema = resultsDatabaseSchema)
72-
connectionDetails<-Eunomia::getEunomiaConnectionDetails()
73-
connectionDetails2<-.createConnectionDetails2(cdmDatabaseSchema = cdmDatabaseSchema,resultsDatabaseSchema = resultsDatabaseSchema)
74-
#' more details
75-
#' @export
76-
.createConnectionDetails2<-function (cdmDatabaseSchema,resultsDatabaseSchema=NULL,cdmVersion=NULL
77-
,cohortTable='cohort'
78-
,workFolder='c:/temp') {
79-
result <- list()
80-
for (name in names(formals(.createConnectionDetails2))) {
81-
result[[name]] <- get(name)
82-
}
83-
values <- lapply(as.list(match.call())[-1], function(x) eval(x,
84-
envir = sys.frame(-3)))
85-
for (name in names(values)) {
86-
if (name %in% names(result))
87-
result[[name]] <- values[[name]]
88-
}
89-
class(result) <- "connectionDetails2"
90-
return(result)
91-
}
92-
library(DataQuality)
93-
library(Eunomia)
94-
connectionDetails<-Eunomia::getEunomiaConnectionDetails()
95-
cdmDatabaseSchema <-'main'
96-
resultsDatabaseSchema <-'main' #at most sites this likely will not be the same as cdmDatabaseSchema
97-
workFolder <- 'c:/temp/dqd' #this folder must exist (use forward slashes)
98-
connectionDetails2<-.createConnectionDetails2(cdmDatabaseSchema = cdmDatabaseSchema
99-
,resultsDatabaseSchema = resultsDatabaseSchema)
100-
DataQuality::dashboardLabThresholds(connectionDetails = connectionDetails
101-
,connectionDetails2 = connectionDetails2)
102-
Achilles::achilles(connectionDetails = connectionDetails
103-
,cdmDatabaseSchema = connectionDetails2$cdmDatabaseSchema
104-
,resultsDatabaseSchema = connectionDetails2$resultsDatabaseSchema
105-
,analysisIds = c(1807))
106-
Achilles::achilles(connectionDetails = connectionDetails
107-
,cdmDatabaseSchema = connectionDetails2$cdmDatabaseSchema
108-
,resultsDatabaseSchema = connectionDetails2$resultsDatabaseSchema
109-
,analysisIds = c(1807)
110-
,runHeel = FALSE
111-
,createIndices = FALSE)
112-
units<-Achilles::fetchAchillesAnalysisResults(connectionDetails = connectionDetails,resultsDatabaseSchema = connectionDetails2$resultsDatabaseSchema
113-
,analysisId = 1807)
114-
units
115-
#tbd
116-
library(devtools)
117-
devtools::use_package('dplyr')
118-
#tbd
119-
library(devtools)
120-
devtools::use_package('dplyr')
121-
use_package('dplyr')
122-
names(units)
123-
names(units)
124-
units
125-
,analysisId = 1807)$analysisResults
126-
,analysisId = 1807)$analysisResults
127-
units<-units$analysisResults
128-
names(units)
129-
units2<-units$analysisResults
130-
units2<-units$analysisResults
131-
names(units2)
132-
names(units2) <- tolower(names(units2))
133-
units2<-units$analysisResults
134-
names(units2) <- tolower(names(units2))
1351
names(units2)
1362
units2<-units$analysisResults
1373
names(units2) <- tolower(names(units2))
@@ -510,3 +376,137 @@ d4 %>% write_csv('extras/DqdResults/thresholds-list-A.csv')
510376
d4<-d3 %>% select(-count_value,-median_value,-stdev_value,-avg_value,-site)
511377
d4 %>% write_csv('extras/DqdResults/thresholds-list-A.csv')
512378
nrow(d4)
379+
#read DD checks
380+
url='https://raw.githubusercontent.com/OHDSI/DataQualityDashboard/master/inst/csv/OMOP_CDMv5.3.1_Concept_Level.csv'
381+
dqd<-read_csv(url)
382+
#read DD checks
383+
library(tidyverse)
384+
#read DD checks
385+
library(stats);library(tidyverse);library(magrittr)
386+
message("\n*** Successfully loaded .Rprofile ***\n")
387+
url='https://raw.githubusercontent.com/OHDSI/DataQualityDashboard/master/inst/csv/OMOP_CDMv5.3.1_Concept_Level.csv'
388+
dqd<-read_csv(url)
389+
str(dqd)
390+
View(dqd)
391+
View(dqd)
392+
str(dqd)
393+
names(dqd)
394+
dqd %>% filter(cdmTableName=='MEASUREMENT')
395+
names(dqd)
396+
dqd %>% filter(cdmTableName=='MEASUREMENT' & cmdFieldName=='MEASUREMENT_CONCEPT_ID' )
397+
names(dqd)
398+
dqd %>% filter(cdmTableName=='MEASUREMENT' & cmdFieldName=='MEASUREMENT_CONCEPT_ID' )
399+
dqd %>% filter(cdmTableName=='MEASUREMENT' && cmdFieldName=='MEASUREMENT_CONCEPT_ID' )
400+
names(dqd)
401+
dqd %>% filter(cdmTableName=='MEASUREMENT' & cmdFieldName=='MEASUREMENT_CONCEPT_ID' )
402+
dqd %>% dplyr::filter(cdmTableName=='MEASUREMENT' & cmdFieldName=='MEASUREMENT_CONCEPT_ID' )
403+
dqd %>% dplyr::filter(cdmFieldName=='MEASUREMENT_CONCEPT_ID' )
404+
dqd %>% dplyr::filter(cdmFieldName=='MEASUREMENT_CONCEPT_ID' ) %>% nrow()
405+
dqd %>% count(cdmTableName)
406+
dqd %>% count(cdmTableName,cdmFieldName)
407+
#reading a single site data (for now)
408+
f<-'d:/OneDrive - National Institutes of Health/temp/dqd/export/'
409+
sfiles<-c(file.path(f,'1ThresholdsA.csv'))
410+
ll<-map(sfiles,read_csv)
411+
#reading a single site data (for now)
412+
f<-'d:/OneDrive - National Institutes of Health/temp/dqd/export'
413+
sfiles<-c(file.path(f,'1ThresholdsA.csv'))
414+
ll<-map(sfiles,read_csv)
415+
sfiles<-c(file.path(f,'1ThresholdsA.csv'))
416+
ll<-map(sfiles,read_csv)
417+
ll
418+
sfiles<-c(file.path(f,'1ThresholdsA.csv'),file.path(f,'ThresholdsA.csv'))
419+
ll<-map(sfiles,read_csv)
420+
ll
421+
#ll<-map(p$pid,doProperty())
422+
ll2<-map2(ll,sfiles,~mutate(.x,site=.y))
423+
d<-bind_rows(ll2)
424+
#compare data driven and expert drive sets
425+
d
426+
#compare data driven and expert drive sets
427+
d$STRATUM_1 %<>% as.integer()
428+
dqd$unitConceptId
429+
dqd$unitConceptId %<>% as.integer()
430+
expert <-dqd %>% dplyr::filter(cdmFieldName=='MEASUREMENT_CONCEPT_ID' )
431+
expert
432+
names(expert)
433+
ddriven<-d %>% rename(unitConceptId=STRATUM_2)
434+
exprt %>% inner_join(ddriven)
435+
expert %>% inner_join(ddriven)
436+
names(expert)
437+
ddriven<-d %>% rename(conceptId=STRATUM_1,unitConceptId=STRATUM_2)
438+
ddriven<-d %>% rename(conceptId=STRATUM_1,unitConceptId=STRATUM_2) %>% select(conceptId,unitConceptId) %>% distint()
439+
expert %>% inner_join(ddriven)
440+
expert %>% anti_join(ddriven)
441+
expert %>% inner_join(ddriven)
442+
over=expert %>% inner_join(ddriven) #58 overlapping
443+
View(over)
444+
over=expert %>% inner_join(ddriven) #58 overlapping
445+
ddriven<-d %>% rename(conceptId=STRATUM_1,unitConceptId=STRATUM_2) %>% select(conceptId,unitConceptId) %>% unique()
446+
over=expert %>% inner_join(ddriven) #58 overlapping
447+
View(over)
448+
sfiles<-c(file.path(f,'1ThresholdsA.csv'))
449+
ll<-map(sfiles,read_csv)
450+
ll
451+
#ll<-map(p$pid,doProperty())
452+
ll2<-map2(ll,sfiles,~mutate(.x,site=.y))
453+
d<-bind_rows(ll2)
454+
#compare data driven and expert drive sets
455+
d$STRATUM_1 %<>% as.integer()
456+
dqd$unitConceptId %<>% as.integer()
457+
expert <-dqd %>% dplyr::filter(cdmFieldName=='MEASUREMENT_CONCEPT_ID' )
458+
names(expert)
459+
ddriven<-d %>% rename(conceptId=STRATUM_1,unitConceptId=STRATUM_2)
460+
over=expert %>% inner_join(ddriven) #58 overlapping
461+
View(over)
462+
ddriven %>% anti_join(expert) #827 not in data
463+
not1<-ddriven %>% anti_join(expert) #49 not in expert
464+
View(not1)
465+
#remove no units rows and expand the CIDs
466+
d2<-d %>% filter(stratum_1 != 0) %>% filter(stratum_2 != 0) %>% left_join(sconcept,by=c('stratum_1'='concept_id')) %>%
467+
left_join(sconcept,by=c('stratum_2'='concept_id'))
468+
d<-bind_rows(ll2)
469+
#add terminology concepts
470+
sconcept<-concept %>% select(concept_id,concept_name)
471+
load('o:/athena/concept.rda')
472+
#add terminology concepts
473+
sconcept<-concept %>% select(concept_id,concept_name)
474+
names(d) <- tolower(names(d))
475+
names(d)
476+
#remove no units rows and expand the CIDs
477+
d2<-d %>% filter(stratum_1 != 0) %>% filter(stratum_2 != 0) %>% left_join(sconcept,by=c('stratum_1'='concept_id')) %>%
478+
left_join(sconcept,by=c('stratum_2'='concept_id'))
479+
names(d2)
480+
names(d2)
481+
ddriven %<>% filter(conceptId!=0)
482+
ddriven %<>% filter(unitConceptId!=0)
483+
ddriven<-d %>% rename(conceptId=STRATUM_1,unitConceptId=STRATUM_2)
484+
sfiles<-c(file.path(f,'1ThresholdsA.csv'))
485+
ll<-map(sfiles,read_csv)
486+
ll
487+
#ll<-map(p$pid,doProperty())
488+
ll2<-map2(ll,sfiles,~mutate(.x,site=.y))
489+
d<-bind_rows(ll2)
490+
names(d) <- tolower(names(d))
491+
#remove no units rows and expand the CIDs
492+
d2<-d %>% filter(stratum_1 != 0) %>% filter(stratum_2 != 0) %>% left_join(sconcept,by=c('stratum_1'='concept_id')) %>%
493+
left_join(sconcept,by=c('stratum_2'='concept_id'))
494+
names(d2)
495+
ddriven<-d2 %>% rename(conceptId=stratum_1,unitConceptId=stratum_2)
496+
not1<-ddriven %>% anti_join(expert) #49 not in expert
497+
View(not1)
498+
over=expert %>% inner_join(ddriven) #58 overlapping
499+
View(over)
500+
#compare the trehsholds
501+
names(over)
502+
over %>% select(conceptName,plausibleValueLow,min_value)
503+
over %>% select(conceptName,plausibleValueHigh,max_value)
504+
over %>% select(conceptName,unitConceptName,plausibleValueLow,min_value)
505+
over %>% select(conceptName,unitConceptName,plausibleValueLow,min_value)
506+
over %>% select(conceptName,unitConceptName,plausibleValueHigh,max_value)
507+
over %>% select(conceptName,unitConceptName,plausibleValueHigh,max_value) %>% kable()
508+
over %>% select(conceptName,unitConceptName,plausibleValueHigh,max_value) %>% knittr::kable()
509+
over %>% select(conceptName,unitConceptName,plausibleValueHigh,max_value) %>% knitr::kable()
510+
View(expert)
511+
View(over)
512+
View(over)

extras/CentralProcessing.R

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ load('o:/athena/concept.rda')
4545
#lkup<-concept %>% filter(vocabulary_id %in% c('CPT4','ICD9Proc','CDT','HCPCS','ICD9CM','ICD10CM','ICD10PCS'))
4646

4747
#reading a single site data (for now)
48-
sfiles<-c('c:/temp/dqd/export/1ThresholdsA.csv','c:/temp/dqd/ThresholdsA.csv')
48+
f<-'d:/OneDrive - National Institutes of Health/temp/dqd/export'
49+
50+
sfiles<-c(file.path(f,'1ThresholdsA.csv'))
51+
sfiles<-c(file.path(f,'1ThresholdsA.csv'),file.path(f,'ThresholdsA.csv'))
4952
ll<-map(sfiles,read_csv)
5053
ll
5154

@@ -61,6 +64,7 @@ names(d)
6164
d2<-d %>% filter(stratum_1 != 0) %>% filter(stratum_2 != 0) %>% left_join(sconcept,by=c('stratum_1'='concept_id')) %>%
6265
left_join(sconcept,by=c('stratum_2'='concept_id'))
6366
names(d2)
67+
6468
#remove columns that are not needed
6569
d3<-d2 %>% select(-stratum_3,-stratum_4,-stratum_5,-p25_value,-p75_value) %>%
6670
filter(count_value >=100 ) %>% arrange(stratum_1,desc(count_value) )
@@ -89,3 +93,52 @@ d4<-d3 %>% select(-count_value,-median_value,-stdev_value,-avg_value,-site)
8993

9094
d4 %>% write_csv('extras/DqdResults/thresholds-list-A.csv')
9195
nrow(d4)
96+
97+
98+
#read DD checks
99+
library(stats);library(tidyverse);library(magrittr)
100+
#message("\n*** Successfully loaded .Rprofile ***\n")
101+
102+
103+
url='https://raw.githubusercontent.com/OHDSI/DataQualityDashboard/master/inst/csv/OMOP_CDMv5.3.1_Concept_Level.csv'
104+
dqd<-read_csv(url)
105+
str(dqd)
106+
names(dqd)
107+
dqd %>% dplyr::filter(cdmTableName=='MEASUREMENT' & cmdFieldName=='MEASUREMENT_CONCEPT_ID' )
108+
dqd %>% dplyr::filter(cdmFieldName=='MEASUREMENT_CONCEPT_ID' ) %>% nrow()
109+
dqd %>% count(cdmTableName,cdmFieldName)
110+
111+
112+
#compare data driven and expert drive sets
113+
d$STRATUM_1 %<>% as.integer()
114+
dqd$unitConceptId %<>% as.integer()
115+
expert <-dqd %>% dplyr::filter(cdmFieldName=='MEASUREMENT_CONCEPT_ID' )
116+
names(expert)
117+
ddriven<-d %>% rename(conceptId=STRATUM_1,unitConceptId=STRATUM_2) %>% select(conceptId,unitConceptId) %>% unique()
118+
119+
names(d2)
120+
ddriven<-d %>% rename(conceptId=STRATUM_1,unitConceptId=STRATUM_2)
121+
ddriven<-d2 %>% rename(conceptId=stratum_1,unitConceptId=stratum_2)
122+
#ddriven %<>% filter(conceptId!=0)
123+
#ddriven %<>% filter(unitConceptId!=0)
124+
125+
over=expert %>% inner_join(ddriven) #58 overlapping
126+
View(over)
127+
expert %>% anti_join(ddriven) #827 are in expert but not in data
128+
129+
not1<-ddriven %>% anti_join(expert) #14 are in data and not in expert
130+
131+
132+
#compare the trehsholds
133+
names(over)
134+
over %>% select(conceptName,unitConceptName,plausibleValueLow,min_value)
135+
over %>% select(conceptName,unitConceptName,plausibleValueHigh,max_value)
136+
#%>% knitr::kable()
137+
138+
139+
#expert thresholds don't follow unit conversion logic (max and min is same even if units indicate order of magniture difference)
140+
#MEASUREMENT MEASUREMENT_CONCEPT_ID 3013721 Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma 8713 gram per deciliter 5 5 2000 5 NA NA NA NA NA NA NA NA
141+
#MEASUREMENT MEASUREMENT_CONCEPT_ID 3013721 Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma 8840 milligram per deciliter 5 5 2000
142+
143+
#5g/dL into mg/dL (is 5000 mg/dL)
144+
#in data is in fact unit/L

0 commit comments

Comments
 (0)