@@ -1041,76 +1041,98 @@ load_civic_biomarkers <- function(
10411041 therapeutic_contexts <- as.data.frame(
10421042 clinicalEvidenceSummary | >
10431043 dplyr :: select(evidence_id , therapies ) | >
1044- dplyr :: mutate(therapies = tolower(therapies )) | >
1044+ dplyr :: mutate(therapies = stringr :: str_replace_all(
1045+ .data $ therapies ," Anhydrous" ," "
1046+ )) | >
10451047 dplyr :: mutate(
10461048 therapies = stringr :: str_replace(
10471049 .data $ therapies ,
1048- " futuximab/modotuximab mixture " ,
1049- " futuximab,modotuximab " )
1050+ " Futuximab/Modotuximab Mixture " ,
1051+ " Futuximab,Modotuximab " )
10501052 ) | >
10511053 dplyr :: mutate(
10521054 therapies = stringr :: str_replace(
10531055 .data $ therapies ,
1054- " ipilimumab/nivolumab regimen " ,
1055- " ipilimumab,nivolumab " )
1056+ " Ipilimumab/Nivolumab Regimen " ,
1057+ " Ipilimumab,Nivolumab " )
10561058 ) | >
10571059 dplyr :: mutate(
10581060 therapies = stringr :: str_replace(
10591061 .data $ therapies ,
1060- " durvalumab/tremelimumab regimen " ,
1061- " durvalumab,tremelimumab " )
1062+ " Durvalumab/Tremelimumab Regimen " ,
1063+ " Durvalumab,Tremelimumab " )
10621064 ) | >
10631065 # # dabrafenib/trametinib regimen
10641066 dplyr :: mutate(
10651067 therapies = stringr :: str_replace(
10661068 .data $ therapies ,
1067- " dabrafenib/trametinib regimen " ,
1068- " dabrafenib,trametinib " )
1069+ " Dabrafenib/Trametinib Regimen " ,
1070+ " Dabrafenib,Trametinib " )
10691071 ) | >
10701072
10711073 # # datopotamab deruxtecan regimen
10721074 dplyr :: mutate(
10731075 therapies = stringr :: str_replace(
10741076 .data $ therapies ,
1075- " datopotamab deruxtecan regimen " ,
1076- " datopotamab deruxtecan " )
1077+ " Datopotamab Deruxtecan Regimen " ,
1078+ " Datopotamab Deruxtecan " )
10771079 ) | >
10781080 # # larotrectinib regimen
10791081 dplyr :: mutate(
10801082 therapies = stringr :: str_replace(
10811083 .data $ therapies ,
1082- " larotrectinib regimen " ,
1083- " larotrectinib " )
1084+ " Larotrectinib Regimen " ,
1085+ " Larotrectinib " )
10841086 ) | >
10851087 dplyr :: filter(! is.na(therapies ) &
10861088 nchar(therapies ) > 0 ) | >
1089+ dplyr :: mutate(therapies_lc = tolower(therapies )) | >
1090+ tidyr :: separate_rows(therapies_lc , sep = " ," ) | >
10871091 tidyr :: separate_rows(therapies , sep = " ," ) | >
1092+ dplyr :: filter(tolower(therapies ) == therapies_lc ) | >
1093+ dplyr :: distinct() | >
10881094 dplyr :: left_join(
10891095 dplyr :: select(compound_synonyms ,
10901096 alias_lc ,
10911097 drug_name ,
10921098 molecule_chembl_id ),
1093- by = c(" therapies " = " alias_lc" ),
1099+ by = c(" therapies_lc " = " alias_lc" ),
10941100 multiple = " all" ,
10951101 relationship = " many-to-many"
10961102 ) | >
1097- dplyr :: filter(! is.na(drug_name )) | >
1098- dplyr :: select(evidence_id , molecule_chembl_id ) | >
1103+ # dplyr::filter(!is.na(drug_name)) |>
1104+ dplyr :: select(evidence_id , therapies ,
1105+ drug_name ,
1106+ molecule_chembl_id ) | >
10991107 dplyr :: distinct() | >
11001108 dplyr :: group_by(evidence_id ) | >
1101- dplyr :: summarise(molecule_chembl_id = paste(
1102- unique(sort(molecule_chembl_id )), collapse = " |" ),
1109+ dplyr :: summarise(
1110+ drug_name = paste(
1111+ unique(sort(drug_name )), collapse = " ," ),
1112+ therapeutic_context = paste(
1113+ unique(sort(therapies )), collapse = " ," ),
1114+ molecule_chembl_id = paste(
1115+ unique(sort(molecule_chembl_id )), collapse = " |" ),
11031116 .groups = " drop"
11041117 ) | >
1118+ dplyr :: filter(! is.na(.data $ therapeutic_context ) &
1119+ ! stringr :: str_detect(
1120+ .data $ therapeutic_context ," ^[0-9]{1,}$" ) &
1121+ nchar(.data $ therapeutic_context ) > 2 ) | >
11051122 dplyr :: ungroup() | >
11061123 dplyr :: mutate(molecule_chembl_id = stringr :: str_replace(
11071124 molecule_chembl_id ," ^NA\\ ||\\ |NA$" ," "
11081125 ))
11091126 )
11101127
11111128 clinicalEvidenceSummary <- clinicalEvidenceSummary | >
1129+ dplyr :: select(- c(" therapies" )) | >
11121130 dplyr :: left_join(
1113- therapeutic_contexts ,
1131+ dplyr :: select(
1132+ therapeutic_contexts ,
1133+ evidence_id ,
1134+ molecule_chembl_id ,
1135+ therapeutic_context ),
11141136 by = " evidence_id" , multiple = " all" ,
11151137 relationship = " many-to-many" )
11161138
@@ -1656,7 +1678,7 @@ load_civic_biomarkers <- function(
16561678 dplyr :: select(molecular_profile_id , molecular_profile_name ,
16571679 molecular_profile_summary ,
16581680 variant_id , dplyr :: everything()) | >
1659- dplyr :: rename(therapeutic_context = therapies ) | >
1681+ # dplyr::rename(therapeutic_context = therapies) |>
16601682 dplyr :: mutate(evidence_id = paste0(" EID" ,evidence_id )) | >
16611683 dplyr :: filter(evidence_id != " EIDNA" ) | >
16621684 dplyr :: mutate(biomarker_source = " civic" ,
@@ -2214,50 +2236,64 @@ load_cgi_biomarkers <- function(compound_synonyms = NULL,
22142236 dplyr :: select(- c(entrezgene , variant_consequence , variant_alias ,
22152237 alias_type , gene , symbol , alteration_type ,
22162238 alteration )) | >
2217- dplyr :: distinct() | >
2218- dplyr :: mutate(therapeutic_context = dplyr :: if_else(
2219- stringr :: str_detect(
2220- therapeutic_context ,
2221- " (ib|ide|ole|ant|stat|ine|us|one|mab|in|el|[0-9])( | )\\ (" ),
2222- stringr :: str_replace(
2223- therapeutic_context ,
2224- " ( | )\\ (.*\\ )( )?$" ," " ),
2225- as.character(therapeutic_context )
2226- ))
2239+ dplyr :: distinct()
22272240
22282241
22292242 therapeutic_contexts <- as.data.frame(
22302243 cgi_clinical | >
22312244 dplyr :: select(molecular_profile_id , therapeutic_context ) | >
2232- dplyr :: filter(! is.na(therapeutic_context ) &
2233- nchar(therapeutic_context ) > 0 ) | >
2234- tidyr :: separate_rows(therapeutic_context , sep = " \\ + " ) | >
2235- dplyr :: mutate(therapeutic_context = tolower(therapeutic_context )) | >
2245+ # dplyr::mutate(therapeutic_context = stringr::str_replace(
2246+ # .data$therapeutic_context, " deruxtecan-nxki",""
2247+ # )) |>
2248+ dplyr :: mutate(therapeutic_context2 = trimws(
2249+ gsub(" \\ s*\\ ([^)]+\\ )\\ s*" , " " , therapeutic_context ))) | >
2250+ dplyr :: filter(! is.na(therapeutic_context2 ) &
2251+ nchar(therapeutic_context2 ) > 0 ) | >
2252+ dplyr :: rename(therapies = therapeutic_context2 ) | >
2253+ dplyr :: mutate(therapies_lc = tolower(therapies )) | >
2254+ tidyr :: separate_rows(therapies_lc , sep = " \\ + " ) | >
2255+ tidyr :: separate_rows(therapies , sep = " \\ + " ) | >
2256+ dplyr :: filter(tolower(therapies ) == therapies_lc ) | >
2257+ dplyr :: distinct() | >
22362258 dplyr :: left_join(
22372259 dplyr :: select(compound_synonyms ,
2238- alias_lc ,
2239- drug_name ,
2260+ alias_lc ,
2261+ drug_name ,
22402262 molecule_chembl_id ),
2241- by = c(" therapeutic_context" = " alias_lc" ),
2242- multiple = " all" , relationship = " many-to-many"
2263+ by = c(" therapies_lc" = " alias_lc" ),
2264+ multiple = " all" ,
2265+ relationship = " many-to-many"
22432266 ) | >
2244- dplyr :: filter(! is.na(drug_name )) | >
2245- dplyr :: select(molecular_profile_id , molecule_chembl_id ) | >
2267+ # dplyr::filter(!is.na(drug_name)) |>
2268+ dplyr :: select(molecular_profile_id , therapies ,
2269+ drug_name ,
2270+ molecule_chembl_id ) | >
22462271 dplyr :: distinct() | >
22472272 dplyr :: group_by(molecular_profile_id ) | >
2248- dplyr :: summarise(molecule_chembl_id = paste(
2249- unique(sort(molecule_chembl_id )), collapse = " |" ),
2273+ dplyr :: summarise(
2274+ drug_name = paste(
2275+ unique(sort(drug_name )), collapse = " ," ),
2276+ therapeutic_context = paste(
2277+ unique(sort(therapies )), collapse = " ," ),
2278+ molecule_chembl_id = paste(
2279+ unique(sort(molecule_chembl_id )), collapse = " |" ),
22502280 .groups = " drop"
22512281 ) | >
22522282 dplyr :: ungroup() | >
22532283 dplyr :: mutate(molecule_chembl_id = stringr :: str_replace(
22542284 molecule_chembl_id ," ^NA\\ ||\\ |NA$" ," "
2255- ))
2285+ ))
22562286 )
22572287
22582288 cgi_clinical <- cgi_clinical | >
2289+ dplyr :: select(- c(" therapeutic_context" )) | >
22592290 dplyr :: left_join(
2260- therapeutic_contexts , by = " molecular_profile_id" ,
2291+ dplyr :: select(
2292+ therapeutic_contexts ,
2293+ c(" molecular_profile_id" ,
2294+ " therapeutic_context" ,
2295+ " molecule_chembl_id" )),
2296+ by = " molecular_profile_id" ,
22612297 multiple = " all" , relationship = " many-to-many" ) | >
22622298 dplyr :: mutate(evidence_url = " https://www.cancergenomeinterpreter.org/biomarkers" ) | >
22632299 dplyr :: mutate(biomarker_source = " cgi" ) | >
0 commit comments