Skip to content

Commit 5c339bf

Browse files
authored
fixes scicrunch API due to change in OAS (ITISFoundation#3668)
1 parent 842eeb1 commit 5c339bf

File tree

3 files changed

+55
-42
lines changed

3 files changed

+55
-42
lines changed

services/web/server/src/simcore_service_webserver/scicrunch/_resolver.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66

77
import logging
88
from datetime import datetime
9-
from typing import Any, Dict, List, Optional
9+
from typing import Any
1010

1111
from aiohttp import ClientSession
12-
from pydantic import Field
12+
from pydantic import Field, ValidationError
1313
from pydantic.main import BaseModel
1414
from pydantic.types import NonNegativeInt
1515

@@ -39,7 +39,7 @@ class HitSource(BaseModel):
3939
item: ItemInfo
4040
rrid: RRIDInfo
4141

42-
def flatten_dict(self) -> Dict[str, Any]:
42+
def flatten_dict(self) -> dict[str, Any]:
4343
"""Used as an output"""
4444
return {**self.item.dict(), **self.rrid.dict()}
4545

@@ -50,7 +50,7 @@ class HitDetail(BaseModel):
5050

5151
class Hits(BaseModel):
5252
total: NonNegativeInt
53-
hits: List[HitDetail]
53+
hits: list[HitDetail]
5454

5555

5656
class ResolverInfo(BaseModel):
@@ -77,10 +77,12 @@ class ResolvedItem(BaseModel):
7777

7878

7979
async def resolve_rrid(
80-
identifier: str, client: ClientSession, settings: SciCrunchSettings
81-
) -> Optional[ResolvedItem]:
80+
identifier: str,
81+
client: ClientSession,
82+
settings: SciCrunchSettings,
83+
) -> list[ResolvedItem]:
8284
"""
83-
Provides a API to access to results as provided by this web https://scicrunch.org/resolver
85+
API to access to results of https://scicrunch.org/resolver
8486
8587
"""
8688
# Example https://scicrunch.org/resolver/RRID:AB_90755.json
@@ -95,25 +97,24 @@ async def resolve_rrid(
9597
if resolved.hits.total == 0:
9698
return None
9799

98-
# FIXME: Not sure why the same RRID can have multiple hits.
100+
# WARNING: Not sure why the same RRID can have multiple hits.
99101
# We have experience that the order of hits is not preserve and
100102
# therefore selecting the first hit is not the right way to go ...
101103
#
102-
# WARNING: Since Sep.2021, hits returned by resolver does not guarantee order.
104+
# WARNING: scicrunch API has been changing:
105+
# - Since Sep.2021, hits returned by resolver DOES NOT guarantee order.
103106
# For instance, https://scicrunch.org/resolver/RRID:CVCL_0033.json changes
104107
# the order every call and the first hit flips between
105108
# '(BCRJ Cat# 0226, RRID:CVCL_0033)' and '(ATCC Cat# HTB-30, RRID:CVCL_0033)'
109+
# - Since Dec.2022 hits returned by https://scicrunch.org/resolver/RRID:AB_90755.json
110+
# also changed order '(Sigma-Aldrich Cat# AB1542, RRID:AB_90755)' and
111+
# '(Millipore Cat# AB1542, RRID:AB_90755)'. Decided to return all hits.
106112
#
107-
hit = resolved.hits.hits[0].source
108-
109-
if resolved.hits.total > 1:
110-
logger.warning(
111-
"Multiple hits (%d) for '%s'. Returning first",
112-
resolved.hits.total,
113-
identifier,
114-
)
115-
else:
116-
assert resolved.hits.total == 1 # nosec
117-
118-
output = ResolvedItem.parse_obj(hit.flatten_dict())
119-
return output
113+
items = []
114+
for hit in resolved.hits.hits:
115+
try:
116+
items.append(ResolvedItem.parse_obj(hit.source.flatten_dict()))
117+
except ValidationError as err:
118+
logger.warning("Skipping unexpected response %s: %s", url, err)
119+
120+
return items

services/web/server/src/simcore_service_webserver/scicrunch/service_client.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import asyncio
88
import logging
9-
from typing import Any, List, MutableMapping, Optional
9+
from typing import Any, MutableMapping
1010

1111
from aiohttp import ClientSession, client_exceptions
1212
from pydantic import ValidationError
@@ -125,12 +125,17 @@ async def get_resource_fields(self, rrid: str) -> ResearchResource:
125125
# NOTE: replaces former call to API.
126126
# Resolver entrypoint does NOT require authentication
127127
# and has an associated website
128-
resolved: Optional[ResolvedItem] = await resolve_rrid(
128+
resolved_items: list[ResolvedItem] = await resolve_rrid(
129129
rrid, self.client, self.settings
130130
)
131-
if not resolved:
131+
if not resolved_items:
132132
raise InvalidRRID(f".Could not resolve {rrid}")
133133

134+
# WARNING: currently we only take the first, but it might
135+
# have multiple hits. Nonetheless, test_scicrunch_resolves_all_valid_rrids
136+
# checks them all
137+
resolved = resolved_items[0]
138+
134139
return ResearchResource(
135140
rrid=rrid,
136141
name=resolved.name,
@@ -154,7 +159,7 @@ async def get_resource_fields(self, rrid: str) -> ResearchResource:
154159
# https://docs.aiohttp.org/en/stable/client_reference.html#hierarchy-of-exceptions
155160
raise ScicrunchServiceError("Failed to connect scicrunch service") from err
156161

157-
async def search_resource(self, name_as: str) -> List[ResourceHit]:
162+
async def search_resource(self, name_as: str) -> list[ResourceHit]:
158163
# Safe: returns empty string if fails!
159164
# Might be slow and timeout!
160165
# Might be good to know that scicrunch.org is not reachable and cannot perform search now?

services/web/server/tests/integration/02/scicrunch/test_scicrunch__resolver.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
import pytest
77
from aiohttp import ClientSession
88
from aiohttp.client import ClientTimeout
9-
10-
# FIXME: PC check the CELL_LINE_CITATIONS test please
11-
from pytest_simcore.helpers.utils_scrunch_citations import ( # CELL_LINE_CITATIONS,
9+
from pytest_simcore.helpers.utils_scrunch_citations import (
1210
ANTIBODY_CITATIONS,
1311
ORGANISM_CITATIONS,
1412
PLAMID_CITATIONS,
@@ -20,8 +18,7 @@
2018

2119
@pytest.mark.parametrize(
2220
"name,rrid",
23-
TOOL_CITATIONS + ANTIBODY_CITATIONS + PLAMID_CITATIONS + ORGANISM_CITATIONS
24-
# + CELL_LINE_CITATIONS, PC: this one fails
21+
TOOL_CITATIONS + ANTIBODY_CITATIONS + PLAMID_CITATIONS + ORGANISM_CITATIONS,
2522
)
2623
async def test_scicrunch_resolves_all_valid_rrids(
2724
name: str, rrid: str, settings: SciCrunchSettings
@@ -32,21 +29,27 @@ async def test_scicrunch_resolves_all_valid_rrids(
3229
# This tests checks some of the structure "deduced" from the responses so far.
3330

3431
async with ClientSession(timeout=ClientTimeout(total=30)) as client:
35-
resolved = await resolve_rrid(identifier=rrid, client=client, settings=settings)
32+
resolved_items: list[ResolvedItem] = await resolve_rrid(
33+
identifier=rrid, client=client, settings=settings
34+
)
3635

37-
assert resolved
38-
assert isinstance(resolved, ResolvedItem)
36+
for resolved in resolved_items:
37+
assert resolved
38+
assert isinstance(resolved, ResolvedItem)
3939

40-
if resolved.is_unique and name:
41-
assert name in resolved.proper_citation
40+
if resolved.is_unique and name:
41+
assert name in resolved.proper_citation
4242

43-
assert rrid in resolved.proper_citation
43+
assert rrid in resolved.proper_citation
4444

4545
# NOTE: proper_citation does not seem to have a standard format.
4646
# So far I found four different formats!! :-o
4747
if not name:
4848
# only rrid with a prefix
49-
assert resolved.proper_citation == f"RRID:{rrid}"
49+
assert any(
50+
resolved.proper_citation == f"RRID:{rrid}"
51+
for resolved in resolved_items
52+
)
5053
else:
5154
# proper_citation includes both 'name' and 'rrid' but in different formats!
5255

@@ -58,8 +61,12 @@ async def test_scicrunch_resolves_all_valid_rrids(
5861
# of the reference in CELL_LINE_CITATIONS
5962
#
6063

61-
assert resolved.proper_citation in (
62-
f"({name}, RRID:{rrid})",
63-
f"({name},RRID:{rrid})",
64-
f"{name} (RRID:{rrid})",
64+
assert any(
65+
resolved.proper_citation
66+
in (
67+
f"({name}, RRID:{rrid})",
68+
f"({name},RRID:{rrid})",
69+
f"{name} (RRID:{rrid})",
70+
)
71+
for resolved in resolved_items
6572
)

0 commit comments

Comments
 (0)