Skip to content

Commit bbf9d69

Browse files
authored
Merge pull request #266 from NOAA-GSL/bug/265-normalize-item-dict-conversion
fix(api_search): extract name from nested dict instead of repr-string
2 parents e4d5fb9 + cd29b0c commit bbf9d69

File tree

2 files changed

+130
-5
lines changed

2 files changed

+130
-5
lines changed

src/zyra/connectors/discovery/api_search.py

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,47 @@ def _parse_json_body(arg: str | None) -> Any | None:
299299
return None
300300

301301

302+
def _extract_name(value: Any) -> str | None:
303+
"""Extract a human-readable string from *value*.
304+
305+
Used to normalize heterogeneous fields (names, descriptions, links)
306+
into a single string representation.
307+
308+
- ``str`` → returned as-is.
309+
- ``dict`` → drills into common label-like keys (``name``, ``title``,
310+
``id``, ``path``, ``label``, ``url``, ``href``, ``uri``, ``link``)
311+
and returns the first value that is not ``None`` and not the empty
312+
string, as a string. Nested dicts are handled recursively.
313+
- Other types → converted via ``str()`` as a last resort.
314+
- ``None`` → returns ``None``.
315+
"""
316+
if value is None:
317+
return None
318+
if isinstance(value, str):
319+
return value
320+
if isinstance(value, dict):
321+
for key in (
322+
"name",
323+
"title",
324+
"id",
325+
"path",
326+
"label",
327+
"url",
328+
"href",
329+
"uri",
330+
"link",
331+
):
332+
v = value.get(key)
333+
if v is not None and v != "":
334+
if isinstance(v, str):
335+
return v
336+
if isinstance(v, dict):
337+
return _extract_name(v)
338+
return str(v)
339+
# No recognizable sub-key; fall through to str()
340+
return str(value)
341+
342+
302343
def _normalize_item(item: dict[str, Any], source_host: str) -> dict[str, Any]:
303344
"""Map remote item to unified row schema.
304345
@@ -310,11 +351,17 @@ def _normalize_item(item: dict[str, Any], source_host: str) -> dict[str, Any]:
310351
item.get("name") or item.get("title") or item.get("dataset") or item.get("id")
311352
)
312353
desc = item.get("description") or item.get("abstract") or None
313-
link = item.get("uri") or item.get("link") or item.get("url") or None
314-
# Strings only
315-
name_s = str(name) if name is not None else None
316-
desc_s = str(desc) if desc is not None else None
317-
link_s = str(link) if link is not None else None
354+
link = (
355+
item.get("uri")
356+
or item.get("link")
357+
or item.get("href")
358+
or item.get("url")
359+
or None
360+
)
361+
# Strings only — drill into dicts when needed
362+
name_s = _extract_name(name)
363+
desc_s = _extract_name(desc)
364+
link_s = _extract_name(link)
318365
return {
319366
"source": source_host,
320367
"dataset": name_s or "",

tests/connectors/test_search_api.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,3 +392,81 @@ def test_cli_api_openapi_diagnostics(monkeypatch, capsys):
392392
assert rc == 0
393393
out = capsys.readouterr().out
394394
assert "suggest --param for: q,limit" in out
395+
396+
397+
def test_extract_name_string_passthrough():
398+
from zyra.connectors.discovery.api_search import _extract_name
399+
400+
assert _extract_name("hello") == "hello"
401+
assert _extract_name(None) is None
402+
assert _extract_name(42) == "42"
403+
404+
405+
def test_extract_name_drills_into_dict():
406+
from zyra.connectors.discovery.api_search import _extract_name
407+
408+
assert (
409+
_extract_name({"name": "Synoptic-UAS", "path": "data/Synoptic-UAS"})
410+
== "Synoptic-UAS"
411+
)
412+
assert _extract_name({"path": "data/Synoptic-UAS"}) == "data/Synoptic-UAS"
413+
assert _extract_name({"title": "My Title"}) == "My Title"
414+
assert _extract_name({"id": "abc123"}) == "abc123"
415+
assert _extract_name({"label": "Atmospheric CO2"}) == "Atmospheric CO2"
416+
# Non-string values in name-like keys should be stringified
417+
assert _extract_name({"id": 123}) == "123"
418+
assert _extract_name({"name": 42}) == "42"
419+
# Nested dict in a name-like key is recursively drilled into
420+
assert _extract_name({"name": {"id": "x"}}) == "x"
421+
assert _extract_name({"name": {"title": "inner", "id": "y"}}) == "inner"
422+
423+
424+
def test_normalize_item_nested_dataset_dict():
425+
from zyra.connectors.discovery.api_search import _normalize_item
426+
427+
item = {
428+
"dataset": {"path": "data/Synoptic-UAS", "name": "Synoptic-UAS"},
429+
"score": 1,
430+
}
431+
row = _normalize_item(item, "example.com")
432+
assert row["dataset"] == "Synoptic-UAS"
433+
assert row["source"] == "example.com"
434+
435+
436+
def test_normalize_item_nested_dataset_dict_no_name():
437+
from zyra.connectors.discovery.api_search import _normalize_item
438+
439+
item = {"dataset": {"path": "data/foo"}}
440+
row = _normalize_item(item, "host")
441+
assert row["dataset"] == "data/foo"
442+
443+
444+
def test_normalize_item_nested_description_and_link():
445+
from zyra.connectors.discovery.api_search import _normalize_item
446+
447+
item = {
448+
"name": "DS1",
449+
"description": {"title": "A long description object"},
450+
"uri": {"path": "http://example.com/ds1"},
451+
}
452+
row = _normalize_item(item, "host")
453+
assert row["dataset"] == "DS1"
454+
assert row["description"] == "A long description object"
455+
assert row["link"] == "http://example.com/ds1"
456+
457+
458+
def test_extract_name_url_like_dict_keys():
459+
from zyra.connectors.discovery.api_search import _extract_name
460+
461+
assert _extract_name({"url": "http://x/data"}) == "http://x/data"
462+
assert _extract_name({"href": "http://x/ref"}) == "http://x/ref"
463+
assert _extract_name({"uri": "http://x/uri"}) == "http://x/uri"
464+
assert _extract_name({"link": "http://x/link"}) == "http://x/link"
465+
466+
467+
def test_normalize_item_href_link():
468+
from zyra.connectors.discovery.api_search import _normalize_item
469+
470+
item = {"name": "DS", "href": "http://example.com/ds"}
471+
row = _normalize_item(item, "host")
472+
assert row["link"] == "http://example.com/ds"

0 commit comments

Comments
 (0)