Skip to content

Commit 238ccb8

Browse files
committed
add ensembl ID
1 parent 8ccd7a9 commit 238ccb8

File tree

6 files changed

+16374
-16319
lines changed

6 files changed

+16374
-16319
lines changed

data/mapping_GRCh38_p14_olink.rda

23.6 KB
Binary file not shown.
26.4 KB
Binary file not shown.

inst/data/mapping_GRCh38.p14_olink.txt

Lines changed: 5436 additions & 5436 deletions
Large diffs are not rendered by default.

inst/data/mapping_GRCh38.p14_somalogic.txt

Lines changed: 10878 additions & 10878 deletions
Large diffs are not rendered by default.

inst/scripts/mapping-olink.R

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ biomaRt_getBM_batch <- function(mart, attributes, filters, values, chunk_size) {
9292

9393
# data ====
9494
# HT panel - https://view-su3.highspot.com/viewer/1ca88054dbbf92809402c590b5cb8b39 - i got a csv directly from olink
95-
data <- readxl::read_xlsx("/Users/leem/OneDrive - International Agency for Research on Cancer/001_projects/functions/inst/data/Olink Explore HT_Assay list_2024-10-31.xlsx", skip = 6)
95+
data <- readxl::read_xlsx("inst/data/Olink_Explore_HT_Assay_list_2024-10-31.xlsx", skip = 6)
9696

9797
## format data
9898
data <- data %>%
@@ -144,11 +144,31 @@ map_hgnc <- biomaRt_getBM_batch(
144144
chunk_size = 2
145145
)
146146

147+
# ensemble primary id ====
148+
ensembl_uniprot <- getBM(
149+
attributes = c("uniprot_gn_id", "ensembl_gene_id", "chromosome_name"),
150+
filters = "uniprot_gn_id",
151+
values = id_uniprot_id,
152+
mart = mart
153+
)
154+
155+
ensembl_hgnc <- getBM(
156+
attributes = c("hgnc_symbol", "ensembl_gene_id", "chromosome_name"),
157+
filters = "hgnc_symbol",
158+
values = id_hgnc_id,
159+
mart = mart
160+
)
161+
162+
map_ensembl <- full_join(ensembl_uniprot, ensembl_hgnc, by = c("ensembl_gene_id", "chromosome_name")) %>%
163+
filter(chromosome_name %in% c(as.character(1:22), "X", "Y"))
164+
147165
# combine maps ====
148166
map <- rbind(map_uniprot,
149167
map_hgnc) %>%
150168
dplyr::distinct()
151169

170+
map <- full_join(map, map_ensembl)
171+
152172
counts <- map %>%
153173
summarise(across(everything(), ~ sum(is.na(.) | . == ""))) %>%
154174
tidyr::pivot_longer(cols = everything(), names_to = "Column", values_to = "MissingCount")
@@ -226,11 +246,15 @@ columns <- c(
226246
"olinkID", "UNIPROT", "Target","TargetFullName",
227247
"uniprot_gn_id", "uniprot_gn_symbol",
228248
"entrezgene_id", "entrezgene_accession",
229-
"hgnc_id", "hgnc_symbol", "external_gene_name", "gene_biotype",
249+
"hgnc_id", "hgnc_symbol", "ensembl_gene_id", "external_gene_name","gene_biotype",
230250
"CHR", "START_hg19", "END_hg19", "strand_hg19", "START_hg38", "END_hg38", "strand_hg38"
231251
)
232252
data_map <- data_map[, columns]
233253

254+
counts <- data_map %>%
255+
summarise(across(everything(), ~ sum(is.na(.) | . == ""))) %>%
256+
tidyr::pivot_longer(cols = everything(), names_to = "Column", values_to = "MissingCount")
257+
234258
## write
235259
write.table(x = data_map,
236260
file = paste0("inst/data/mapping_", VAR_build, "_olink.txt"),

inst/scripts/mapping-somalogic.R

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ biomaRt_getBM_batch <- function(mart, attributes, filters, values, chunk_size) {
9494
# wget https://github.com/SomaLogic/SomaLogic-Data/raw/main/example_data.adat # Retrieve just the 5k (v4.0) ADAT
9595
# wget https://github.com/SomaLogic/SomaLogic-Data/raw/main/example_data_v4.1_plasma.adat # Retrieve just the 7k (v4.1) ADAT
9696
# wget https://github.com/SomaLogic/SomaLogic-Data/raw/main/example_data_v5.0_plasma.adat # Retrieve just the 11k (v5.0) ADAT
97-
data <- SomaDataIO::read_adat("/Users/leem/Downloads/example_data_v5.0_plasma.adat")
97+
data <- SomaDataIO::read_adat("inst/data/example_data_v5.0_plasma.adat")
9898
data <- SomaDataIO::getAnalyteInfo(data)
9999

100100
## format data
@@ -163,17 +163,44 @@ map_hgnc <- biomaRt_getBM_batch(
163163
chunk_size = 2
164164
)
165165

166+
# ensemble primary id ====
167+
ensembl_uniprot <- getBM(
168+
attributes = c("uniprot_gn_id", "ensembl_gene_id", "chromosome_name"),
169+
filters = "uniprot_gn_id",
170+
values = id_uniprot_id,
171+
mart = mart
172+
)
173+
174+
ensembl_entrez <- getBM(
175+
attributes = c("entrezgene_id", "ensembl_gene_id", "chromosome_name"),
176+
filters = "entrezgene_id",
177+
values = as.integer(id_entrez_id),
178+
mart = mart
179+
)
180+
181+
ensembl_hgnc <- getBM(
182+
attributes = c("hgnc_symbol", "ensembl_gene_id", "chromosome_name"),
183+
filters = "hgnc_symbol",
184+
values = id_hgnc_id,
185+
mart = mart
186+
)
187+
188+
map_ensembl <- full_join(ensembl_uniprot, ensembl_entrez, by = c("ensembl_gene_id", "chromosome_name")) %>%
189+
full_join(ensembl_hgnc, by = c("ensembl_gene_id", "chromosome_name")) %>%
190+
filter(chromosome_name %in% c(as.character(1:22), "X", "Y"))
191+
166192
# combine maps ====
167193
map <- rbind(map_uniprot,
168194
map_entrez,
169195
map_hgnc) %>%
170196
dplyr::distinct()
171197

198+
map <- full_join(map, map_ensembl)
199+
172200
counts <- map %>%
173201
summarise(across(everything(), ~ sum(is.na(.) | . == ""))) %>%
174202
tidyr::pivot_longer(cols = everything(), names_to = "Column", values_to = "MissingCount")
175203

176-
177204
# positions ====
178205
## hg19 ====
179206
id_hg19 <- subset(genes(TxDb.Hsapiens.UCSC.hg19.knownGene,
@@ -252,11 +279,15 @@ columns <- c(
252279
"SeqId", "SomaId", "UNIPROT", "Target", "TargetFullName", "EntrezGeneID", "EntrezGeneSymbol",
253280
"uniprot_gn_id", "uniprot_gn_symbol",
254281
"entrezgene_id","entrezgene_accession",
255-
"hgnc_id", "hgnc_symbol", "external_gene_name","gene_biotype",
282+
"hgnc_id", "hgnc_symbol", "ensembl_gene_id", "external_gene_name","gene_biotype",
256283
"CHR", "START_hg19", "END_hg19", "strand_hg19","START_hg38", "END_hg38", "strand_hg38"
257284
)
258285
data_map <- data_map[, columns]
259286

287+
counts <- data_map %>%
288+
summarise(across(everything(), ~ sum(is.na(.) | . == ""))) %>%
289+
tidyr::pivot_longer(cols = everything(), names_to = "Column", values_to = "MissingCount")
290+
260291
## write
261292
write.table(x = data_map,
262293
file = paste0("inst/data/mapping_", VAR_build, "_somalogic.txt"),

0 commit comments

Comments
 (0)