Skip to content

Commit ccd93b3

Browse files
committed
fix: enhance error handling in get_allele_registry_associations function
1 parent e75c25f commit ccd93b3

File tree

3 files changed

+61
-18
lines changed

3 files changed

+61
-18
lines changed

src/mavedb/lib/clingen/services.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
import hashlib
22
import logging
3-
import requests
43
import os
54
import time
65
from datetime import datetime
7-
from typing import Optional
6+
from typing import Optional, Union
87
from urllib import parse
98

10-
9+
import requests
1110
from jose import jwt
1211

13-
from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict
1412
from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT
15-
16-
from mavedb.lib.types.clingen import LdhSubmission, ClinGenAllele
13+
from mavedb.lib.logging.context import format_raised_exception_info_as_dict, logging_context, save_to_logging_context
14+
from mavedb.lib.types.clingen import ClinGenAllele, ClinGenSubmissionError, LdhSubmission
1715
from mavedb.lib.utils import batched
1816

1917
logger = logging.getLogger(__name__)
@@ -71,7 +69,9 @@ def construct_auth_url(self, url: str) -> str:
7169
token = hashlib.sha1((url + identity + gbTime).encode("utf-8")).hexdigest()
7270
return url + "&gbLogin=" + GENBOREE_ACCOUNT_NAME + "&gbTime=" + gbTime + "&gbToken=" + token
7371

74-
def dispatch_submissions(self, content_submissions: list[str]) -> list[ClinGenAllele]:
72+
def dispatch_submissions(
73+
self, content_submissions: list[str]
74+
) -> list[Union[ClinGenAllele, ClinGenSubmissionError]]:
7575
save_to_logging_context({"car_submission_count": len(content_submissions)})
7676

7777
try:
@@ -89,7 +89,7 @@ def dispatch_submissions(self, content_submissions: list[str]) -> list[ClinGenAl
8989
logger.error(msg="Failed to dispatch CAR submission.", exc_info=exc, extra=logging_context())
9090
return []
9191

92-
response_data: list[ClinGenAllele] = response.json()
92+
response_data: list[Union[ClinGenAllele, ClinGenSubmissionError]] = response.json()
9393
save_to_logging_context({"car_submission_response_count": len(response_data)})
9494
logger.info(msg="Successfully dispatched CAR submission.", extra=logging_context())
9595

@@ -324,7 +324,7 @@ def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[
324324

325325

326326
def get_allele_registry_associations(
327-
content_submissions: list[str], submission_response: list[ClinGenAllele]
327+
content_submissions: list[str], submission_response: list[Union[ClinGenAllele, ClinGenSubmissionError]]
328328
) -> dict[str, str]:
329329
"""
330330
Links HGVS strings and ClinGen Canonoical Allele IDs (CAIDs) given a list of both.
@@ -360,6 +360,13 @@ def get_allele_registry_associations(
360360

361361
allele_registry_associations: dict[str, str] = {}
362362
for registration in submission_response:
363+
if "errorType" in registration:
364+
logger.warning(
365+
msg=f"Skipping errored ClinGen Allele Registry HGVS {registration.get('hgvs', 'unknown')} ({registration.get('errorType', 'unknown')}): {registration.get('message', 'unknown error message')}",
366+
extra=logging_context(),
367+
)
368+
continue
369+
363370
# Extract the CAID from the URL (e.g., "http://reg.test.genome.network/allele/CA2513066" -> "CA2513066")
364371
caid = registration["@id"].split("/")[-1]
365372
alleles = registration.get("genomicAlleles", []) + registration.get("transcriptAlleles", [])

src/mavedb/lib/types/clingen.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
from typing import Any, Optional, TypedDict, Literal
2-
from typing_extensions import NotRequired
1+
from typing import Any, Literal, Optional, TypedDict
32

3+
from typing_extensions import NotRequired
44

55
# See: https://ldh.genome.network/docs/ldh/submit.html#content-submission-body
66

@@ -152,3 +152,15 @@ class ClinGenAlleleDefinition(TypedDict):
152152
"aminoAcidAlleles": NotRequired[list[ClinGenAlleleDefinition]],
153153
},
154154
)
155+
156+
ClinGenSubmissionError = TypedDict(
157+
"ClinGenSubmissionError",
158+
{
159+
"description": str,
160+
"errorType": str,
161+
"hgvs": str,
162+
"inputLine": str,
163+
"message": str,
164+
"position": str,
165+
},
166+
)

tests/lib/clingen/test_services.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
# ruff: noqa: E402
22

33
import os
4-
import pytest
5-
import requests
64
from datetime import datetime
7-
from unittest.mock import patch, MagicMock
5+
from unittest.mock import MagicMock, patch
86
from urllib import parse
97

8+
import pytest
9+
import requests
10+
1011
arq = pytest.importorskip("arq")
1112
cdot = pytest.importorskip("cdot")
1213
fastapi = pytest.importorskip("fastapi")
1314

14-
from mavedb.lib.clingen.constants import LDH_MAVE_ACCESS_ENDPOINT, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD
15-
from mavedb.lib.utils import batched
15+
from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT
1616
from mavedb.lib.clingen.services import (
1717
ClinGenAlleleRegistryService,
1818
ClinGenLdhService,
19-
get_clingen_variation,
2019
clingen_allele_id_from_ldh_variation,
2120
get_allele_registry_associations,
21+
get_clingen_variation,
2222
)
23-
23+
from mavedb.lib.utils import batched
2424
from tests.helpers.constants import VALID_CLINGEN_CA_ID
2525

2626
TEST_CLINGEN_URL = "https://pytest.clingen.com"
@@ -365,3 +365,27 @@ def test_get_allele_registry_associations_no_match():
365365
]
366366
result = get_allele_registry_associations(content_submissions, submission_response)
367367
assert result == {}
368+
369+
370+
def test_get_allele_registry_associations_mixed():
371+
content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C", "NM_0003:c.3G>A"]
372+
submission_response = [
373+
{
374+
"@id": "http://reg.test.genome.network/allele/CA123",
375+
"genomicAlleles": [{"hgvs": "NM_0001:c.1A>G"}],
376+
"transcriptAlleles": [],
377+
},
378+
{
379+
"errorType": "InvalidHGVS",
380+
"hgvs": "NM_0002:c.2T>C",
381+
"message": "The HGVS string is invalid.",
382+
},
383+
{
384+
"@id": "http://reg.test.genome.network/allele/CA789",
385+
"genomicAlleles": [],
386+
"transcriptAlleles": [{"hgvs": "NM_0003:c.3G>A"}],
387+
},
388+
]
389+
390+
result = get_allele_registry_associations(content_submissions, submission_response)
391+
assert result == {"NM_0001:c.1A>G": "CA123", "NM_0003:c.3G>A": "CA789"}

0 commit comments

Comments
 (0)