Skip to content

Commit f4862ee

Browse files
committed
fix fusion partner mappings
1 parent 0a6fde4 commit f4862ee

File tree

3 files changed

+63
-5
lines changed

3 files changed

+63
-5
lines changed

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# Version 2.2.1
2+
3+
* Fixed mapping of fusion partner entrez gene
4+
identifiers
5+
16
# Version 2.1.9
27

38
* CIViC update (20260216)

data-raw/biomarker_utilities.R

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,54 @@ expand_hgvs_terms <- function(var, aa_dict, add_codon_markers = FALSE) {
793793
return(hits)
794794
}
795795

796+
append_fusion_entrezgene <- function(df, gene_lookup) {
797+
## For FUSION records with NA entrezgene, parse the gene column
798+
## (e.g. "ERG::TMPRSS2") and look up entrezgene for each fusion partner,
799+
## producing a "::" joined string (e.g. "2078::7979").
800+
##
801+
## gene_lookup: data frame with columns 'alias' and 'entrezgene'
802+
803+
fusion_na_mask <- !is.na(df$alteration_type) &
804+
df$alteration_type == "FUSION" &
805+
is.na(df$entrezgene)
806+
807+
if (!any(fusion_na_mask)) {
808+
return(df)
809+
}
810+
811+
## Build a fast named lookup vector: alias -> entrezgene (character)
812+
lookup_vec <- gene_lookup |>
813+
dplyr::select(alias, entrezgene) |>
814+
dplyr::filter(!is.na(.data$entrezgene)) |>
815+
dplyr::distinct() |>
816+
dplyr::group_by(.data$alias) |>
817+
dplyr::slice(1) |>
818+
dplyr::ungroup() |>
819+
(\(d) stats::setNames(as.character(d$entrezgene), d$alias))()
820+
821+
filled <- vapply(df$gene[fusion_na_mask], function(g) {
822+
if (is.na(g)) return(NA_character_)
823+
824+
parts <- stringr::str_split(g, "::")[[1]]
825+
entrez_parts <- vapply(parts, function(p) {
826+
e <- lookup_vec[p]
827+
if (is.na(e)) NA_character_ else unname(e)
828+
}, character(1), USE.NAMES = FALSE)
829+
830+
if (all(is.na(entrez_parts))) {
831+
return(NA_character_)
832+
}
833+
paste(
834+
dplyr::if_else(is.na(entrez_parts), ".", entrez_parts),
835+
collapse = "::"
836+
)
837+
}, character(1), USE.NAMES = FALSE)
838+
839+
df$entrezgene[fusion_na_mask] <- filled
840+
return(df)
841+
}
842+
843+
796844
load_civic_biomarkers <- function(
797845
datestamp = '20251018',
798846
compound_synonyms = NULL,
@@ -1556,9 +1604,12 @@ load_civic_biomarkers <- function(
15561604
TRUE ~ as.character(variant_alias)
15571605
)) |>
15581606
dplyr::distinct()
1559-
1560-
biomarker_items[['clinical']] <-
1561-
clinicalEvidenceSummary |>
1607+
1608+
biomarker_items[['variant']] <- append_fusion_entrezgene(
1609+
biomarker_items[['variant']], gene_aliases)
1610+
1611+
biomarker_items[['clinical']] <-
1612+
clinicalEvidenceSummary |>
15621613
dplyr::full_join(
15631614
molecularProfileSummary, by = "molecular_profile_id",
15641615
multiple = "all", relationship = "many-to-many") |>
@@ -2105,7 +2156,9 @@ load_cgi_biomarkers <- function(compound_synonyms = NULL,
21052156
entrezgene,
21062157
) |>
21072158
dplyr::distinct()
2108-
2159+
2160+
cgi_variants <- append_fusion_entrezgene(
2161+
cgi_variants, gene_alias$records)
21092162

21102163
cgi_clinical <- cgi_biomarkers |>
21112164

data-raw/data-raw.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ biomarkers[['metadata']] <- metadata$biomarkers
217217
# substr(as.character(packageVersion("pharmOncoX")),1,4),
218218
# as.character(as.integer(substr(as.character(packageVersion("pharmOncoX")),5,5)) + 1))
219219

220-
version_bump <- "2.2.0"
220+
version_bump <- "2.2.1"
221221

222222
db <- list()
223223
db[['biomarkers']] <- biomarkers

0 commit comments

Comments
 (0)