@@ -793,6 +793,54 @@ expand_hgvs_terms <- function(var, aa_dict, add_codon_markers = FALSE) {
793793 return (hits )
794794}
795795
796+ append_fusion_entrezgene <- function (df , gene_lookup ) {
797+ # # For FUSION records with NA entrezgene, parse the gene column
798+ # # (e.g. "ERG::TMPRSS2") and look up entrezgene for each fusion partner,
799+ # # producing a "::" joined string (e.g. "2078::7979").
800+ # #
801+ # # gene_lookup: data frame with columns 'alias' and 'entrezgene'
802+
803+ fusion_na_mask <- ! is.na(df $ alteration_type ) &
804+ df $ alteration_type == " FUSION" &
805+ is.na(df $ entrezgene )
806+
807+ if (! any(fusion_na_mask )) {
808+ return (df )
809+ }
810+
811+ # # Build a fast named lookup vector: alias -> entrezgene (character)
812+ lookup_vec <- gene_lookup | >
813+ dplyr :: select(alias , entrezgene ) | >
814+ dplyr :: filter(! is.na(.data $ entrezgene )) | >
815+ dplyr :: distinct() | >
816+ dplyr :: group_by(.data $ alias ) | >
817+ dplyr :: slice(1 ) | >
818+ dplyr :: ungroup() | >
819+ (\(d ) stats :: setNames(as.character(d $ entrezgene ), d $ alias ))()
820+
821+ filled <- vapply(df $ gene [fusion_na_mask ], function (g ) {
822+ if (is.na(g )) return (NA_character_ )
823+
824+ parts <- stringr :: str_split(g , " ::" )[[1 ]]
825+ entrez_parts <- vapply(parts , function (p ) {
826+ e <- lookup_vec [p ]
827+ if (is.na(e )) NA_character_ else unname(e )
828+ }, character (1 ), USE.NAMES = FALSE )
829+
830+ if (all(is.na(entrez_parts ))) {
831+ return (NA_character_ )
832+ }
833+ paste(
834+ dplyr :: if_else(is.na(entrez_parts ), " ." , entrez_parts ),
835+ collapse = " ::"
836+ )
837+ }, character (1 ), USE.NAMES = FALSE )
838+
839+ df $ entrezgene [fusion_na_mask ] <- filled
840+ return (df )
841+ }
842+
843+
796844load_civic_biomarkers <- function (
797845 datestamp = ' 20251018' ,
798846 compound_synonyms = NULL ,
@@ -1556,9 +1604,12 @@ load_civic_biomarkers <- function(
15561604 TRUE ~ as.character(variant_alias )
15571605 )) | >
15581606 dplyr :: distinct()
1559-
1560- biomarker_items [[' clinical' ]] <-
1561- clinicalEvidenceSummary | >
1607+
1608+ biomarker_items [[' variant' ]] <- append_fusion_entrezgene(
1609+ biomarker_items [[' variant' ]], gene_aliases )
1610+
1611+ biomarker_items [[' clinical' ]] <-
1612+ clinicalEvidenceSummary | >
15621613 dplyr :: full_join(
15631614 molecularProfileSummary , by = " molecular_profile_id" ,
15641615 multiple = " all" , relationship = " many-to-many" ) | >
@@ -2105,7 +2156,9 @@ load_cgi_biomarkers <- function(compound_synonyms = NULL,
21052156 entrezgene ,
21062157 ) | >
21072158 dplyr :: distinct()
2108-
2159+
2160+ cgi_variants <- append_fusion_entrezgene(
2161+ cgi_variants , gene_alias $ records )
21092162
21102163 cgi_clinical <- cgi_biomarkers | >
21112164
0 commit comments