Skip to content

Commit b975b8e

Browse files
committed
DISCO-3202 - Enrich the curated-recommendations endpoint with icon-urls
1 parent 2c2e9b1 commit b975b8e

File tree

13 files changed

+270
-34
lines changed

13 files changed

+270
-34
lines changed

merino/curated_recommendations/protocol.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class CuratedRecommendation(CorpusItem):
8585
__typename: TypeName = TypeName.RECOMMENDATION
8686
tileId: Annotated[int, Field(strict=True, ge=MIN_TILE_ID, le=MAX_TILE_ID)]
8787
receivedRank: int
88+
icon: str | None = None
8889

8990
@model_validator(mode="before")
9091
def set_tileId(cls, values):

merino/providers/manifest/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
from timeit import default_timer as timer
44
import logging
55
from merino.utils import metrics
6-
from merino.providers.manifest.provider import Provider
6+
from merino.providers.manifest.provider import ManifestProvider
77
from merino.providers.manifest.backends.manifest import ManifestBackend
88
from merino.configs import settings
99

1010
logger = logging.getLogger(__name__)
1111

12-
provider: Provider | None = None
12+
provider: ManifestProvider | None = None
1313

1414

1515
async def init_provider() -> None:
@@ -20,7 +20,7 @@ async def init_provider() -> None:
2020
global provider
2121
start = timer()
2222

23-
provider = Provider(
23+
provider = ManifestProvider(
2424
backend=ManifestBackend(),
2525
resync_interval_sec=settings.manifest.resync_interval_sec,
2626
cron_interval_sec=settings.manifest.cron_interval_sec,
@@ -37,7 +37,7 @@ async def init_provider() -> None:
3737
)
3838

3939

40-
def get_provider() -> Provider:
40+
def get_provider() -> ManifestProvider:
4141
"""Return manifest provider"""
4242
if provider is None:
4343
raise ValueError("Manifest provider has not been initialized.")

merino/providers/manifest/provider.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@
1111
ManifestBackend,
1212
ManifestBackendError,
1313
ManifestData,
14+
Domain,
1415
)
1516

1617
logger = logging.getLogger(__name__)
1718

1819

19-
class Provider:
20+
class ManifestProvider:
2021
"""Provide access to in-memory manifest data fetched from GCS."""
2122

2223
manifest_data: ManifestData | None
@@ -86,3 +87,25 @@ def _should_fetch(self) -> bool:
8687
def get_manifest_data(self) -> ManifestData | None:
8788
"""Return manifest data"""
8889
return self.manifest_data
90+
91+
def get_domain_info(self, domain: str) -> Domain | None:
92+
"""Get domain information from manifest data.
93+
94+
Args:
95+
domain: Domain name (e.g., 'google.com')
96+
97+
Returns:
98+
Domain object if found, None otherwise
99+
"""
100+
if not self.manifest_data or not self.manifest_data.domains:
101+
return None
102+
103+
# Clean up domain
104+
domain = domain.lower().strip()
105+
106+
# Find matching domain
107+
for domain_info in self.manifest_data.domains:
108+
if domain_info.domain == domain:
109+
return domain_info
110+
111+
return None

merino/services/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Services package for Merino application."""
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""Protocol for icon enrichment service."""
2+
3+
from typing import Protocol
4+
from merino.curated_recommendations.protocol import CuratedRecommendationsResponse
5+
6+
7+
class IconEnricher(Protocol):
8+
"""Protocol for the icon enricher."""
9+
10+
def enrich_response(self, response: CuratedRecommendationsResponse) -> None:
11+
"""Enrich response with icons."""
12+
...
13+
14+
def get_icon_url(self, url: str) -> str | None:
15+
"""Get icon URL for a given URL."""
16+
...
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
"""A Service to retrieve domain data from the manifest provider."""
2+
3+
from urllib.parse import urlparse
4+
import logging
5+
6+
from pydantic import HttpUrl
7+
8+
from merino.providers.manifest.provider import ManifestProvider
9+
from merino.curated_recommendations.protocol import (
10+
CuratedRecommendationsResponse,
11+
CuratedRecommendation,
12+
)
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
class IconEnrichmentService:
18+
"""Service to enrich recommendations with icons from manifest provider."""
19+
20+
def __init__(self, manifest_provider: ManifestProvider):
21+
self.manifest_provider = manifest_provider
22+
23+
def get_icon_url(self, url: HttpUrl) -> str | None:
24+
"""Get icon URL for a domain.
25+
26+
Args:
27+
url: Full URL to extract domain from
28+
29+
Returns:
30+
Icon URL if found, None otherwise
31+
"""
32+
try:
33+
# Extract domain from URL and normalize it
34+
url_str = str(url)
35+
domain = urlparse(url_str).netloc.replace("www.", "")
36+
if domain_info := self.manifest_provider.get_domain_info(domain):
37+
return domain_info.icon
38+
except Exception as e:
39+
logger.warning(f"Error getting icon for URL {url}: {e}")
40+
return None
41+
42+
def enrich_response(self, response: CuratedRecommendationsResponse) -> None:
43+
"""Enrich response with icons from manifest provider.
44+
45+
Args:
46+
response: Response to enrich with icons
47+
"""
48+
49+
def add_icon(rec: CuratedRecommendation) -> None:
50+
if rec.url:
51+
if icon_url := self.get_icon_url(rec.url):
52+
rec.icon = icon_url
53+
54+
# Process main recommendations
55+
for rec in response.data:
56+
add_icon(rec)
57+
58+
# Process feeds if they exist
59+
if response.feeds:
60+
# Need to know recommendations
61+
if response.feeds.need_to_know:
62+
for rec in response.feeds.need_to_know.recommendations:
63+
add_icon(rec)
64+
65+
# Process any section that has recommendations
66+
for section_name in response.feeds.model_fields_set:
67+
if section := getattr(response.feeds, section_name):
68+
if hasattr(section, "recommendations"):
69+
for rec in section.recommendations:
70+
add_icon(rec)

merino/web/api_v1.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from merino.providers.suggest.base import BaseProvider, SuggestionRequest
2929

3030
from merino.providers.manifest.backends.protocol import ManifestData
31+
from merino.services.icon_enrichment.service import IconEnrichmentService
3132
from merino.utils import task_runner
3233

3334
from merino.utils.api.cache_control import get_ttl_for_cache_control_header_for_suggestions
@@ -39,7 +40,7 @@
3940
)
4041

4142
from merino.web.models_v1 import ProviderResponse, SuggestResponse
42-
from merino.providers.manifest.provider import Provider as ManifestProvider
43+
from merino.providers.manifest.provider import ManifestProvider
4344

4445
logger = logging.getLogger(__name__)
4546
router = APIRouter()
@@ -293,6 +294,7 @@ async def providers(
293294
async def curated_content(
294295
curated_recommendations_request: CuratedRecommendationsRequest,
295296
provider: CuratedRecommendationsProvider = Depends(get_corpus_api_provider),
297+
manifest_provider: ManifestProvider = Depends(get_manifest_provider),
296298
) -> CuratedRecommendationsResponse:
297299
"""Query Merino for curated recommendations.
298300
@@ -320,7 +322,14 @@ async def curated_content(
320322
321323
[curated-topics-doc]: https://mozilla-hub.atlassian.net/wiki/x/LQDaMg
322324
"""
323-
return await provider.fetch(curated_recommendations_request)
325+
# Get base response
326+
response = await provider.fetch(curated_recommendations_request)
327+
328+
# Enrich with icons
329+
icon_service = IconEnrichmentService(manifest_provider)
330+
icon_service.enrich_response(response)
331+
332+
return response
324333

325334

326335
@router.get(

tests/integration/api/conftest.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@
66

77
from logging import LogRecord
88
from typing import Iterator, Generator
9+
from unittest.mock import AsyncMock
910

1011
import pytest
1112
import orjson
1213
from starlette.testclient import TestClient
1314
from aiodogstatsd import Client as AioDogstatsdClient
15+
16+
from merino.providers.manifest import ManifestProvider
17+
from merino.providers.manifest.backends.manifest import ManifestBackend
18+
from merino.providers.manifest.backends.protocol import GetManifestResultCode, ManifestData
1419
from merino.utils.gcs.gcs_uploader import GcsUploader
1520
from contextlib import nullcontext
1621
from merino.curated_recommendations.fakespot_backend.protocol import (
@@ -134,3 +139,24 @@ def fakespot_feed() -> FakespotFeed:
134139
footerCopy=FAKESPOT_FOOTER_COPY,
135140
cta=FakespotCTA(ctaCopy=FAKESPOT_CTA_COPY, url=FAKESPOT_CTA_URL),
136141
)
142+
143+
144+
@pytest.fixture
145+
def mock_manifest_backend(mock_manifest_data):
146+
"""Mock ManifestBackend that returns our test data."""
147+
backend = ManifestBackend()
148+
backend.fetch = AsyncMock(
149+
return_value=(GetManifestResultCode.SUCCESS, ManifestData(**mock_manifest_data))
150+
)
151+
return backend
152+
153+
154+
@pytest.fixture
155+
def manifest_provider(mock_manifest_backend):
156+
"""Override the manifest provider fixture with our mocked data."""
157+
provider = ManifestProvider(
158+
backend=mock_manifest_backend,
159+
resync_interval_sec=86400,
160+
cron_interval_sec=3600,
161+
)
162+
return provider

tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646
)
4747
from merino.curated_recommendations.protocol import CuratedRecommendation
4848
from merino.main import app
49+
from merino.providers.manifest import ManifestProvider
50+
from merino.providers.manifest.backends.manifest import ManifestBackend
51+
from merino.providers.manifest import get_provider as get_manifest_provider
52+
from merino.providers.manifest.backends.protocol import Domain
4953
from tests.integration.api.conftest import fakespot_feed
5054

5155

@@ -121,6 +125,28 @@ def extended_expiration_corpus_backend(
121125
)
122126

123127

128+
@pytest.fixture
129+
def manifest_provider():
130+
"""Mock manifest provider with test data"""
131+
backend = ManifestBackend()
132+
provider = ManifestProvider(
133+
backend=backend,
134+
resync_interval_sec=86400,
135+
cron_interval_sec=3600,
136+
)
137+
return provider
138+
139+
140+
@pytest.fixture(autouse=True)
141+
def setup_manifest_provider(manifest_provider):
142+
"""Set up the manifest provider dependency"""
143+
app.dependency_overrides[get_manifest_provider] = lambda: manifest_provider
144+
yield
145+
# Clean up after test
146+
if get_manifest_provider in app.dependency_overrides:
147+
del app.dependency_overrides[get_manifest_provider]
148+
149+
124150
@pytest.fixture(name="corpus_provider")
125151
def provider(
126152
corpus_backend: CorpusApiBackend,
@@ -1529,3 +1555,67 @@ def mock_post_by_days_ago(*args, **kwargs):
15291555
else:
15301556
# Check that only today's items are returned if in control or not in the experiment.
15311557
assert set(days_ago_counter.keys()) == {0}
1558+
1559+
1560+
@pytest.mark.asyncio
1561+
async def test_curated_recommendations_enriched_with_icons(
1562+
manifest_provider,
1563+
corpus_http_client,
1564+
fixture_request_data,
1565+
):
1566+
"""Test the enrichment of a curated recommendation with an added icon-url."""
1567+
manifest_provider.manifest_data.domains = [
1568+
Domain(
1569+
rank=2,
1570+
title="Microsoft – AI, Cloud, Productivity, Computing, Gaming & Apps",
1571+
url="https://www.microsoft.com",
1572+
domain="microsoft.com",
1573+
icon="https://merino-images.services.mozilla.com/favicons/microsoft-icon.png",
1574+
categories=["Business", "Information Technology"],
1575+
serp_categories=[0],
1576+
)
1577+
]
1578+
1579+
mocked_response = {
1580+
"data": {
1581+
"scheduledSurface": {
1582+
"items": [
1583+
{
1584+
"id": "scheduledSurfaceItemId-ABC",
1585+
"corpusItem": {
1586+
"id": "corpusItemId-XYZ",
1587+
"url": "https://www.microsoft.com/some-article?utm_source=firefox-newtab-en-us",
1588+
"title": "Some MS Article",
1589+
"excerpt": "All about Microsoft something",
1590+
"topic": "tech",
1591+
"publisher": "ExamplePublisher",
1592+
"isTimeSensitive": False,
1593+
"imageUrl": "https://somewhere.com/test.jpg",
1594+
},
1595+
}
1596+
]
1597+
}
1598+
}
1599+
}
1600+
corpus_http_client.post.return_value = Response(
1601+
status_code=200,
1602+
json=mocked_response,
1603+
request=fixture_request_data,
1604+
)
1605+
1606+
async with AsyncClient(app=app, base_url="http://test") as ac:
1607+
response = await ac.post(
1608+
"/api/v1/curated-recommendations",
1609+
json={"locale": "en-US"},
1610+
)
1611+
assert response.status_code == 200
1612+
1613+
data = response.json()
1614+
items = data["data"]
1615+
assert len(items) == 1
1616+
1617+
item = items[0]
1618+
assert item["url"] == "https://www.microsoft.com/some-article?utm_source=firefox-newtab-en-us"
1619+
1620+
assert "icon" in item
1621+
assert item["icon"] == "https://merino-images.services.mozilla.com/favicons/microsoft-icon.png"

0 commit comments

Comments
 (0)