Skip to content

Commit 295bda7

Browse files
committed
fix: enhance error handling in get_allele_registry_associations function
1 parent e75c25f commit 295bda7

File tree

3 files changed

+76
-16
lines changed

3 files changed

+76
-16
lines changed

src/mavedb/lib/clingen/services.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
import hashlib
22
import logging
3-
import requests
43
import os
54
import time
65
from datetime import datetime
7-
from typing import Optional
6+
from typing import Optional, Union
87
from urllib import parse
98

10-
9+
import requests
1110
from jose import jwt
1211

13-
from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict
1412
from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT
15-
16-
from mavedb.lib.types.clingen import LdhSubmission, ClinGenAllele
13+
from mavedb.lib.logging.context import format_raised_exception_info_as_dict, logging_context, save_to_logging_context
14+
from mavedb.lib.types.clingen import ClinGenAllele, ClinGenSubmissionError, LdhSubmission
1715
from mavedb.lib.utils import batched
1816

1917
logger = logging.getLogger(__name__)
@@ -324,7 +322,7 @@ def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[
324322

325323

326324
def get_allele_registry_associations(
327-
content_submissions: list[str], submission_response: list[ClinGenAllele]
325+
content_submissions: list[str], submission_response: list[Union[ClinGenAllele, ClinGenSubmissionError]]
328326
) -> dict[str, str]:
329327
"""
330328
Links HGVS strings and ClinGen Canonoical Allele IDs (CAIDs) given a list of both.
@@ -360,6 +358,13 @@ def get_allele_registry_associations(
360358

361359
allele_registry_associations: dict[str, str] = {}
362360
for registration in submission_response:
361+
if "errorType" in registration:
362+
logger.warning(
363+
msg=f"Skipping errored ClinGen Allele Registry HGVS {registration.get('hgvs', 'unknown')} ({registration.get('errorType', 'unknown')}): {registration.get('message', 'unknown error message')}",
364+
extra=logging_context(),
365+
)
366+
continue
367+
363368
# Extract the CAID from the URL (e.g., "http://reg.test.genome.network/allele/CA2513066" -> "CA2513066")
364369
caid = registration["@id"].split("/")[-1]
365370
alleles = registration.get("genomicAlleles", []) + registration.get("transcriptAlleles", [])

src/mavedb/lib/types/clingen.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
from typing import Any, Optional, TypedDict, Literal
2-
from typing_extensions import NotRequired
1+
from typing import Any, Literal, Optional, TypedDict
32

3+
from typing_extensions import NotRequired
44

55
# See: https://ldh.genome.network/docs/ldh/submit.html#content-submission-body
66

@@ -152,3 +152,15 @@ class ClinGenAlleleDefinition(TypedDict):
152152
"aminoAcidAlleles": NotRequired[list[ClinGenAlleleDefinition]],
153153
},
154154
)
155+
156+
ClinGenSubmissionError = TypedDict(
157+
"ClinGenSubmissionError",
158+
{
159+
"description": str,
160+
"errorType": str,
161+
"hgvs": str,
162+
"inputLine": str,
163+
"message": str,
164+
"position": str,
165+
},
166+
)

tests/lib/clingen/test_services.py

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
# ruff: noqa: E402
22

33
import os
4-
import pytest
5-
import requests
64
from datetime import datetime
7-
from unittest.mock import patch, MagicMock
5+
from unittest.mock import MagicMock, patch
86
from urllib import parse
97

8+
import pytest
9+
import requests
10+
1011
arq = pytest.importorskip("arq")
1112
cdot = pytest.importorskip("cdot")
1213
fastapi = pytest.importorskip("fastapi")
1314

14-
from mavedb.lib.clingen.constants import LDH_MAVE_ACCESS_ENDPOINT, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD
15-
from mavedb.lib.utils import batched
15+
from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT
1616
from mavedb.lib.clingen.services import (
1717
ClinGenAlleleRegistryService,
1818
ClinGenLdhService,
19-
get_clingen_variation,
2019
clingen_allele_id_from_ldh_variation,
2120
get_allele_registry_associations,
21+
get_clingen_variation,
2222
)
23-
23+
from mavedb.lib.utils import batched
2424
from tests.helpers.constants import VALID_CLINGEN_CA_ID
2525

2626
TEST_CLINGEN_URL = "https://pytest.clingen.com"
@@ -365,3 +365,46 @@ def test_get_allele_registry_associations_no_match():
365365
]
366366
result = get_allele_registry_associations(content_submissions, submission_response)
367367
assert result == {}
368+
369+
370+
def test_get_allele_registry_associations_with_errors(caplog):
371+
content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C"]
372+
submission_response = [
373+
{
374+
"errorType": "InvalidHGVS",
375+
"hgvs": "NM_0001:c.1A>G",
376+
"message": "The HGVS string is invalid.",
377+
},
378+
{
379+
"@id": "http://reg.test.genome.network/allele/CA456",
380+
"genomicAlleles": [],
381+
"transcriptAlleles": [{"hgvs": "NM_0002:c.2T>C"}],
382+
},
383+
]
384+
385+
result = get_allele_registry_associations(content_submissions, submission_response)
386+
assert result == {}
387+
388+
389+
def test_get_allele_registry_associations_mixed():
390+
content_submissions = ["NM_0001:c.1A>G", "NM_0002:c.2T>C", "NM_0003:c.3G>A"]
391+
submission_response = [
392+
{
393+
"@id": "http://reg.test.genome.network/allele/CA123",
394+
"genomicAlleles": [{"hgvs": "NM_0001:c.1A>G"}],
395+
"transcriptAlleles": [],
396+
},
397+
{
398+
"errorType": "InvalidHGVS",
399+
"hgvs": "NM_0002:c.2T>C",
400+
"message": "The HGVS string is invalid.",
401+
},
402+
{
403+
"@id": "http://reg.test.genome.network/allele/CA789",
404+
"genomicAlleles": [],
405+
"transcriptAlleles": [{"hgvs": "NM_0003:c.3G>A"}],
406+
},
407+
]
408+
409+
result = get_allele_registry_associations(content_submissions, submission_response)
410+
assert result == {"NM_0001:c.1A>G": "CA123", "NM_0003:c.3G>A": "CA789"}

0 commit comments

Comments
 (0)