Skip to content

Commit 60394e9

Browse files
authored
Enhance UniProt ID handling (#36)
- adds support for sp/tr annotations - handle different formats in ID mapping
1 parent 5c2fca6 commit 60394e9

File tree

2 files changed

+29
-10
lines changed

2 files changed

+29
-10
lines changed

src/naming_conventions.jl

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ julia> uniprotX("Q8VGW6_MOUSE/31-308")
6464
```
6565
"""
6666
function uniprotX(name::AbstractString)::AbstractString
67+
if startswith(name, "tr|") || startswith(name, "sp|")
68+
name = name[4:end]
69+
end
6770
m = match(rex_uniprotX_Swiss, name)
6871
m !== nothing && return m.captures[1]
6972
m = match(rex_uniprot_accession, name)
@@ -101,7 +104,7 @@ function query_uniprot_accession(id)
101104
end
102105

103106
"""
104-
jobID = GPCRAnalysis.map_uniprot_submit(ids, from, to="UniProtKB")
107+
jobID = GPCRAnalysis.map_uniprot_submit(ids, from="UniProtKB_AC-ID", to="UniProtKB")
105108
106109
Submit a list of `ids` to the Uniprot ID mapping service, to convert from ID convention `from` to `to`.
107110
The jobID can be used to check the status (`map_uniprot_status`) and retrieve the results (`map_uniprot_retrieve`).
@@ -111,24 +114,24 @@ The jobID can be used to check the status (`map_uniprot_status`) and retrieve th
111114
julia> jobID = GPCRAnalysis.map_uniprot_submit(["ENSMUSG00000067064", "ENSMUSG00000057464"], "Ensembl");
112115
```
113116
"""
114-
function map_uniprot_submit(ids::AbstractString, from::AbstractString, to::AbstractString="UniProtKB")
117+
function map_uniprot_submit(ids::AbstractString, from::AbstractString="UniProtKB_AC-ID", to::AbstractString="UniProtKB")
115118
resp = HTTP.post("https://rest.uniprot.org/idmapping/run", [],
116-
Dict("from" => from, "to" => "UniProtKB", "ids" => ids))
119+
Dict("from" => from, "to" => to, "ids" => ids))
117120
if resp.status == 200
118121
return JSON3.read(String(resp.body))["jobId"]
119122
end
120123
return nothing
121124
end
122-
map_uniprot_submit(ids::AbstractVector, from::AbstractString) = map_uniprot_submit(join(ids, ','), from)
125+
map_uniprot_submit(ids::AbstractVector, args...) = map_uniprot_submit(join(ids, ','), args...)
123126

124127
"""
125-
status = GPCRAnalysis.map_uniprot_status(jobId)
128+
status = GPCRAnalysis.map_uniprot_status(jobID)
126129
127130
Check the status of a Uniprot ID mapping job. Returns `true` if the results are
128131
ready. Otherwise, returns the status object.
129132
"""
130-
function map_uniprot_status(jobId)
131-
resp = HTTP.get("https://rest.uniprot.org/idmapping/status/$jobId", ["Accept" => "application/json"])
133+
function map_uniprot_status(jobID)
134+
resp = HTTP.get("https://rest.uniprot.org/idmapping/status/$jobID", ["Accept" => "application/json"]; decompress = true)
132135
if resp.status == 200
133136
status = JSON3.read(String(HTTP.decode(resp)))
134137
haskey(status, "results") && return true
@@ -138,12 +141,12 @@ function map_uniprot_status(jobId)
138141
end
139142

140143
"""
141-
result = GPCRAnalysis.map_uniprot_retrieve(jobId)
144+
result = GPCRAnalysis.map_uniprot_retrieve(jobID)
142145
143146
Retrieve the results of a Uniprot ID mapping job.
144147
"""
145-
function map_uniprot_retrieve(jobId)
146-
resp = HTTP.get("https://rest.uniprot.org/idmapping/stream/$jobId", ["Accept" => "application/json"])
148+
function map_uniprot_retrieve(jobID)
149+
resp = HTTP.get("https://rest.uniprot.org/idmapping/stream/$jobID", ["Accept" => "application/json"]; decompress = true)
147150
if resp.status == 200
148151
return JSON3.read(String(HTTP.decode(resp)))
149152
end

test/runtests.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ using Test
3030
@test_throws ErrorException uniprotX("Q8VGW67_MOUSE/31-308")
3131
@test_throws ErrorException uniprotX("q8vgw6_MOUSE/31-308")
3232
@test_throws ErrorException uniprotX("QV8GW6_MOUSE/31-308")
33+
# FASTA
34+
@test @inferred(uniprotX("tr|M3WEA8|M3WEA8_FELCA Olfactory receptor family 51 subfamily S member 1 OS=Felis catus OX=9685 GN=OR51S1 PE=4 SV=2")) == "M3WEA8"
3335

3436
# Versioned
3537
@test uniprotX("Q8VGW6.37") == "Q8VGW6"
@@ -299,6 +301,18 @@ using Test
299301
@testset "Uniprot" begin
300302
@test query_uniprot_accession("T2R38_MOUSE") == "Q7TQA6"
301303

304+
jobID = GPCRAnalysis.map_uniprot_submit(["T2R38_MOUSE"])
305+
tstart = time()
306+
sleep(1)
307+
status = false
308+
while status != true && time() < tstart + 20
309+
sleep(1)
310+
status = GPCRAnalysis.map_uniprot_status(jobID)
311+
end
312+
results = GPCRAnalysis.map_uniprot_retrieve(jobID)[:results]
313+
resultsdict = Dict(obj["from"] => obj["to"] for obj in results)
314+
@test resultsdict["T2R38_MOUSE"] == "Q7TQA6"
315+
302316
jobID = GPCRAnalysis.map_uniprot_submit(["ENSMUSG00000067064", "ENSMUSG00000057464"], "Ensembl")
303317
tstart = time()
304318
sleep(1)
@@ -310,6 +324,8 @@ using Test
310324
results = GPCRAnalysis.map_uniprot_retrieve(jobID)[:results]
311325
resultsdict = Dict(obj["from"] => obj["to"] for obj in results)
312326
@test resultsdict["ENSMUSG00000067064"] == "Q8VGU4"
327+
328+
313329
end
314330

315331
@testset "EBI and NCBI" begin

0 commit comments

Comments
 (0)