Skip to content

Commit e9e86b0

Browse files
committed
fix therapy drug orders
1 parent 6434d53 commit e9e86b0

File tree

4 files changed

+85
-45
lines changed

4 files changed

+85
-45
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# Version 2.2.3
2+
3+
* Make uniform ordering of biomarker-associated therapies
4+
15
# Version 2.2.2
26

37
* Fixed some biomarker drug names

R/sysdata.rda

-5 Bytes
Binary file not shown.

data-raw/biomarker_utilities.R

Lines changed: 80 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,76 +1041,98 @@ load_civic_biomarkers <- function(
10411041
therapeutic_contexts <- as.data.frame(
10421042
clinicalEvidenceSummary |>
10431043
dplyr::select(evidence_id, therapies) |>
1044-
dplyr::mutate(therapies = tolower(therapies)) |>
1044+
dplyr::mutate(therapies = stringr::str_replace_all(
1045+
.data$therapies," Anhydrous",""
1046+
)) |>
10451047
dplyr::mutate(
10461048
therapies = stringr::str_replace(
10471049
.data$therapies,
1048-
"futuximab/modotuximab mixture",
1049-
"futuximab,modotuximab")
1050+
"Futuximab/Modotuximab Mixture",
1051+
"Futuximab,Modotuximab")
10501052
) |>
10511053
dplyr::mutate(
10521054
therapies = stringr::str_replace(
10531055
.data$therapies,
1054-
"ipilimumab/nivolumab regimen",
1055-
"ipilimumab,nivolumab")
1056+
"Ipilimumab/Nivolumab Regimen",
1057+
"Ipilimumab,Nivolumab")
10561058
) |>
10571059
dplyr::mutate(
10581060
therapies = stringr::str_replace(
10591061
.data$therapies,
1060-
"durvalumab/tremelimumab regimen",
1061-
"durvalumab,tremelimumab")
1062+
"Durvalumab/Tremelimumab Regimen",
1063+
"Durvalumab,Tremelimumab")
10621064
) |>
10631065
## dabrafenib/trametinib regimen
10641066
dplyr::mutate(
10651067
therapies = stringr::str_replace(
10661068
.data$therapies,
1067-
"dabrafenib/trametinib regimen",
1068-
"dabrafenib,trametinib")
1069+
"Dabrafenib/Trametinib Regimen",
1070+
"Dabrafenib,Trametinib")
10691071
) |>
10701072

10711073
## datopotamab deruxtecan regimen
10721074
dplyr::mutate(
10731075
therapies = stringr::str_replace(
10741076
.data$therapies,
1075-
"datopotamab deruxtecan regimen",
1076-
"datopotamab deruxtecan")
1077+
"Datopotamab Deruxtecan Regimen",
1078+
"Datopotamab Deruxtecan")
10771079
) |>
10781080
## larotrectinib regimen
10791081
dplyr::mutate(
10801082
therapies = stringr::str_replace(
10811083
.data$therapies,
1082-
"larotrectinib regimen",
1083-
"larotrectinib")
1084+
"Larotrectinib Regimen",
1085+
"Larotrectinib")
10841086
) |>
10851087
dplyr::filter(!is.na(therapies) &
10861088
nchar(therapies) > 0) |>
1089+
dplyr::mutate(therapies_lc = tolower(therapies)) |>
1090+
tidyr::separate_rows(therapies_lc, sep = ",") |>
10871091
tidyr::separate_rows(therapies, sep = ",") |>
1092+
dplyr::filter(tolower(therapies) == therapies_lc) |>
1093+
dplyr::distinct() |>
10881094
dplyr::left_join(
10891095
dplyr::select(compound_synonyms,
10901096
alias_lc,
10911097
drug_name,
10921098
molecule_chembl_id),
1093-
by = c("therapies" = "alias_lc"),
1099+
by = c("therapies_lc" = "alias_lc"),
10941100
multiple = "all",
10951101
relationship = "many-to-many"
10961102
) |>
1097-
dplyr::filter(!is.na(drug_name)) |>
1098-
dplyr::select(evidence_id, molecule_chembl_id) |>
1103+
#dplyr::filter(!is.na(drug_name)) |>
1104+
dplyr::select(evidence_id, therapies,
1105+
drug_name,
1106+
molecule_chembl_id) |>
10991107
dplyr::distinct() |>
11001108
dplyr::group_by(evidence_id) |>
1101-
dplyr::summarise(molecule_chembl_id = paste(
1102-
unique(sort(molecule_chembl_id)), collapse = "|"),
1109+
dplyr::summarise(
1110+
drug_name = paste(
1111+
unique(sort(drug_name)), collapse = ","),
1112+
therapeutic_context = paste(
1113+
unique(sort(therapies)), collapse = ","),
1114+
molecule_chembl_id = paste(
1115+
unique(sort(molecule_chembl_id)), collapse = "|"),
11031116
.groups = "drop"
11041117
) |>
1118+
dplyr::filter(!is.na(.data$therapeutic_context) &
1119+
!stringr::str_detect(
1120+
.data$therapeutic_context,"^[0-9]{1,}$") &
1121+
nchar(.data$therapeutic_context) > 2) |>
11051122
dplyr::ungroup() |>
11061123
dplyr::mutate(molecule_chembl_id = stringr::str_replace(
11071124
molecule_chembl_id,"^NA\\||\\|NA$",""
11081125
))
11091126
)
11101127

11111128
clinicalEvidenceSummary <- clinicalEvidenceSummary |>
1129+
dplyr::select(-c("therapies")) |>
11121130
dplyr::left_join(
1113-
therapeutic_contexts,
1131+
dplyr::select(
1132+
therapeutic_contexts,
1133+
evidence_id,
1134+
molecule_chembl_id,
1135+
therapeutic_context),
11141136
by = "evidence_id", multiple = "all",
11151137
relationship = "many-to-many")
11161138

@@ -1656,7 +1678,7 @@ load_civic_biomarkers <- function(
16561678
dplyr::select(molecular_profile_id, molecular_profile_name,
16571679
molecular_profile_summary,
16581680
variant_id, dplyr::everything()) |>
1659-
dplyr::rename(therapeutic_context = therapies) |>
1681+
#dplyr::rename(therapeutic_context = therapies) |>
16601682
dplyr::mutate(evidence_id = paste0("EID",evidence_id)) |>
16611683
dplyr::filter(evidence_id != "EIDNA") |>
16621684
dplyr::mutate(biomarker_source = "civic",
@@ -2214,50 +2236,64 @@ load_cgi_biomarkers <- function(compound_synonyms = NULL,
22142236
dplyr::select(-c(entrezgene, variant_consequence, variant_alias,
22152237
alias_type, gene, symbol, alteration_type,
22162238
alteration)) |>
2217-
dplyr::distinct() |>
2218-
dplyr::mutate(therapeutic_context = dplyr::if_else(
2219-
stringr::str_detect(
2220-
therapeutic_context,
2221-
"(ib|ide|ole|ant|stat|ine|us|one|mab|in|el|[0-9])( | )\\("),
2222-
stringr::str_replace(
2223-
therapeutic_context,
2224-
"( | )\\(.*\\)( )?$",""),
2225-
as.character(therapeutic_context)
2226-
))
2239+
dplyr::distinct()
22272240

22282241

22292242
therapeutic_contexts <- as.data.frame(
22302243
cgi_clinical |>
22312244
dplyr::select(molecular_profile_id, therapeutic_context) |>
2232-
dplyr::filter(!is.na(therapeutic_context) &
2233-
nchar(therapeutic_context) > 0) |>
2234-
tidyr::separate_rows(therapeutic_context, sep = " \\+ ") |>
2235-
dplyr::mutate(therapeutic_context = tolower(therapeutic_context)) |>
2245+
#dplyr::mutate(therapeutic_context = stringr::str_replace(
2246+
# .data$therapeutic_context, " deruxtecan-nxki",""
2247+
#)) |>
2248+
dplyr::mutate(therapeutic_context2 = trimws(
2249+
gsub("\\s*\\([^)]+\\)\\s*", " ", therapeutic_context))) |>
2250+
dplyr::filter(!is.na(therapeutic_context2) &
2251+
nchar(therapeutic_context2) > 0) |>
2252+
dplyr::rename(therapies = therapeutic_context2) |>
2253+
dplyr::mutate(therapies_lc = tolower(therapies)) |>
2254+
tidyr::separate_rows(therapies_lc, sep = " \\+ ") |>
2255+
tidyr::separate_rows(therapies, sep = " \\+ ") |>
2256+
dplyr::filter(tolower(therapies) == therapies_lc) |>
2257+
dplyr::distinct() |>
22362258
dplyr::left_join(
22372259
dplyr::select(compound_synonyms,
2238-
alias_lc,
2239-
drug_name,
2260+
alias_lc,
2261+
drug_name,
22402262
molecule_chembl_id),
2241-
by = c("therapeutic_context" = "alias_lc"),
2242-
multiple = "all", relationship = "many-to-many"
2263+
by = c("therapies_lc" = "alias_lc"),
2264+
multiple = "all",
2265+
relationship = "many-to-many"
22432266
) |>
2244-
dplyr::filter(!is.na(drug_name)) |>
2245-
dplyr::select(molecular_profile_id, molecule_chembl_id) |>
2267+
#dplyr::filter(!is.na(drug_name)) |>
2268+
dplyr::select(molecular_profile_id, therapies,
2269+
drug_name,
2270+
molecule_chembl_id) |>
22462271
dplyr::distinct() |>
22472272
dplyr::group_by(molecular_profile_id) |>
2248-
dplyr::summarise(molecule_chembl_id = paste(
2249-
unique(sort(molecule_chembl_id)), collapse="|"),
2273+
dplyr::summarise(
2274+
drug_name = paste(
2275+
unique(sort(drug_name)), collapse = ","),
2276+
therapeutic_context = paste(
2277+
unique(sort(therapies)), collapse = ","),
2278+
molecule_chembl_id = paste(
2279+
unique(sort(molecule_chembl_id)), collapse = "|"),
22502280
.groups = "drop"
22512281
) |>
22522282
dplyr::ungroup() |>
22532283
dplyr::mutate(molecule_chembl_id = stringr::str_replace(
22542284
molecule_chembl_id,"^NA\\||\\|NA$",""
2255-
))
2285+
))
22562286
)
22572287

22582288
cgi_clinical <- cgi_clinical |>
2289+
dplyr::select(-c("therapeutic_context")) |>
22592290
dplyr::left_join(
2260-
therapeutic_contexts, by = "molecular_profile_id",
2291+
dplyr::select(
2292+
therapeutic_contexts,
2293+
c("molecular_profile_id",
2294+
"therapeutic_context",
2295+
"molecule_chembl_id")),
2296+
by = "molecular_profile_id",
22612297
multiple = "all", relationship = "many-to-many") |>
22622298
dplyr::mutate(evidence_url = "https://www.cancergenomeinterpreter.org/biomarkers") |>
22632299
dplyr::mutate(biomarker_source = "cgi") |>

data-raw/data-raw.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ biomarkers[['metadata']] <- metadata$biomarkers
217217
# substr(as.character(packageVersion("pharmOncoX")),1,4),
218218
# as.character(as.integer(substr(as.character(packageVersion("pharmOncoX")),5,5)) + 1))
219219

220-
version_bump <- "2.2.2"
220+
version_bump <- "2.2.3"
221221

222222
db <- list()
223223
db[['biomarkers']] <- biomarkers

0 commit comments

Comments
 (0)