Skip to content

Commit ca2687b

Browse files
committed
DISCO-3202 - Enrich the curated-recommendations endpoint with icon-urls
1 parent 2c2e9b1 commit ca2687b

File tree

9 files changed

+212
-50
lines changed

9 files changed

+212
-50
lines changed

merino/curated_recommendations/protocol.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class CuratedRecommendation(CorpusItem):
8585
__typename: TypeName = TypeName.RECOMMENDATION
8686
tileId: Annotated[int, Field(strict=True, ge=MIN_TILE_ID, le=MAX_TILE_ID)]
8787
receivedRank: int
88+
icon: str | None = None
8889

8990
@model_validator(mode="before")
9091
def set_tileId(cls, values):

merino/providers/manifest/provider.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import asyncio
44
import time
55
import logging
6+
from urllib.parse import urlparse
7+
8+
from pydantic import HttpUrl
69

710
from merino.providers.manifest.backends.filemanager import GetManifestResultCode
811
from merino.utils import cron
@@ -20,6 +23,7 @@ class Provider:
2023
"""Provide access to in-memory manifest data fetched from GCS."""
2124

2225
manifest_data: ManifestData | None
26+
lookup_table: dict[str, int]
2327
cron_task: asyncio.Task
2428
resync_interval_sec: int
2529
cron_interval_sec: int
@@ -39,6 +43,7 @@ def __init__(
3943
self.cron_interval_sec = cron_interval_sec
4044
self.last_fetch_at = 0.0
4145
self.manifest_data = ManifestData(domains=[])
46+
self.lookup_table = {}
4247
self.data_fetched_event = asyncio.Event()
4348

4449
super().__init__()
@@ -63,8 +68,12 @@ async def _fetch_data(self) -> None:
6368
result_code, data = await self.backend.fetch()
6469

6570
match GetManifestResultCode(result_code):
66-
case GetManifestResultCode.SUCCESS:
71+
case GetManifestResultCode.SUCCESS if data is not None:
6772
self.manifest_data = data
73+
self.lookup_table = {
74+
domain.domain: idx for idx, domain in enumerate(data.domains)
75+
}
76+
logger.info(f"Built lookup table: {self.lookup_table}")
6877
self.last_fetch_at = time.time()
6978

7079
case GetManifestResultCode.SKIP:
@@ -86,3 +95,23 @@ def _should_fetch(self) -> bool:
8695
def get_manifest_data(self) -> ManifestData | None:
8796
"""Return manifest data"""
8897
return self.manifest_data
98+
99+
def get_icon_url(self, url: str | HttpUrl) -> str | None:
100+
"""Get icon URL for a domain.
101+
102+
Args:
103+
url: Full URL to extract domain from (string or HttpUrl)
104+
105+
Returns:
106+
Icon URL if found, None otherwise
107+
"""
108+
try:
109+
url_str = str(url)
110+
domain = urlparse(url_str).netloc.replace("www.", "")
111+
idx = self.lookup_table.get(domain)
112+
if idx is not None and self.manifest_data is not None:
113+
return self.manifest_data.domains[idx].icon
114+
115+
except Exception as e:
116+
logger.warning(f"Error getting icon for URL {url}: {e}")
117+
return None

merino/web/api_v1.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
)
4040

4141
from merino.web.models_v1 import ProviderResponse, SuggestResponse
42-
from merino.providers.manifest.provider import Provider as ManifestProvider
42+
from merino.providers.manifest.provider import Provider
4343

4444
logger = logging.getLogger(__name__)
4545
router = APIRouter()
@@ -293,6 +293,7 @@ async def providers(
293293
async def curated_content(
294294
curated_recommendations_request: CuratedRecommendationsRequest,
295295
provider: CuratedRecommendationsProvider = Depends(get_corpus_api_provider),
296+
manifest_provider: Provider = Depends(get_manifest_provider),
296297
) -> CuratedRecommendationsResponse:
297298
"""Query Merino for curated recommendations.
298299
@@ -320,7 +321,28 @@ async def curated_content(
320321
321322
[curated-topics-doc]: https://mozilla-hub.atlassian.net/wiki/x/LQDaMg
322323
"""
323-
return await provider.fetch(curated_recommendations_request)
324+
response = await provider.fetch(curated_recommendations_request)
325+
326+
# Add icons to recommendations
327+
for rec in response.data:
328+
if rec.url and (icon_url := manifest_provider.get_icon_url(rec.url)):
329+
rec.icon = icon_url
330+
331+
# Add icons to feeds if they exist
332+
if response.feeds:
333+
if response.feeds.need_to_know:
334+
for rec in response.feeds.need_to_know.recommendations:
335+
if rec.url and (icon_url := manifest_provider.get_icon_url(rec.url)):
336+
rec.icon = icon_url
337+
338+
for section_name in response.feeds.model_fields_set:
339+
if section := getattr(response.feeds, section_name):
340+
if hasattr(section, "recommendations"):
341+
for rec in section.recommendations:
342+
if rec.url and (icon_url := manifest_provider.get_icon_url(rec.url)):
343+
rec.icon = icon_url
344+
345+
return response
324346

325347

326348
@router.get(
@@ -330,7 +352,7 @@ async def curated_content(
330352
response_model=ManifestData,
331353
)
332354
async def get_manifest(
333-
request: Request, provider: ManifestProvider = Depends(get_manifest_provider)
355+
request: Request, provider: Provider = Depends(get_manifest_provider)
334356
) -> ORJSONResponse:
335357
"""Query merino for manifest data."""
336358
logger.info("Attempting to get manifest")

tests/integration/api/conftest.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@
66

77
from logging import LogRecord
88
from typing import Iterator, Generator
9+
from unittest.mock import AsyncMock
910

1011
import pytest
1112
import orjson
1213
from starlette.testclient import TestClient
1314
from aiodogstatsd import Client as AioDogstatsdClient
15+
16+
from merino.providers.manifest import Provider
17+
from merino.providers.manifest.backends.manifest import ManifestBackend
18+
from merino.providers.manifest.backends.protocol import GetManifestResultCode, ManifestData
1419
from merino.utils.gcs.gcs_uploader import GcsUploader
1520
from contextlib import nullcontext
1621
from merino.curated_recommendations.fakespot_backend.protocol import (
@@ -134,3 +139,42 @@ def fakespot_feed() -> FakespotFeed:
134139
footerCopy=FAKESPOT_FOOTER_COPY,
135140
cta=FakespotCTA(ctaCopy=FAKESPOT_CTA_COPY, url=FAKESPOT_CTA_URL),
136141
)
142+
143+
144+
@pytest.fixture
145+
def mock_manifest():
146+
"""Mock manifest data with a known icon URL."""
147+
return {
148+
"domains": [
149+
{
150+
"rank": 1,
151+
"domain": "spotify.com",
152+
"categories": ["Entertainment"],
153+
"serp_categories": [0],
154+
"url": "https://www.spotify.com",
155+
"title": "Spotify",
156+
"icon": "https://test.com/spotify-favicon.ico",
157+
}
158+
]
159+
}
160+
161+
162+
@pytest.fixture
163+
def mock_manifest_backend(mock_manifest):
164+
"""Mock ManifestBackend that returns our test data."""
165+
backend = ManifestBackend()
166+
backend.fetch = AsyncMock(
167+
return_value=(GetManifestResultCode.SUCCESS, ManifestData(**mock_manifest))
168+
)
169+
return backend
170+
171+
172+
@pytest.fixture
173+
def manifest_provider(mock_manifest_backend):
174+
"""Override the manifest provider fixture with our mocked data."""
175+
provider = Provider(
176+
backend=mock_manifest_backend,
177+
resync_interval_sec=86400,
178+
cron_interval_sec=3600,
179+
)
180+
return provider

tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
)
4747
from merino.curated_recommendations.protocol import CuratedRecommendation
4848
from merino.main import app
49+
from merino.providers.manifest import get_provider as get_manifest_provider
50+
from merino.providers.manifest.backends.protocol import Domain
4951
from tests.integration.api.conftest import fakespot_feed
5052

5153

@@ -121,6 +123,16 @@ def extended_expiration_corpus_backend(
121123
)
122124

123125

126+
@pytest.fixture(autouse=True)
127+
def setup_manifest_provider(manifest_provider):
128+
"""Set up the manifest provider dependency"""
129+
app.dependency_overrides[get_manifest_provider] = lambda: manifest_provider
130+
yield
131+
# Clean up after test
132+
if get_manifest_provider in app.dependency_overrides:
133+
del app.dependency_overrides[get_manifest_provider]
134+
135+
124136
@pytest.fixture(name="corpus_provider")
125137
def provider(
126138
corpus_backend: CorpusApiBackend,
@@ -1529,3 +1541,69 @@ def mock_post_by_days_ago(*args, **kwargs):
15291541
else:
15301542
# Check that only today's items are returned if in control or not in the experiment.
15311543
assert set(days_ago_counter.keys()) == {0}
1544+
1545+
1546+
@pytest.mark.asyncio
1547+
async def test_curated_recommendations_enriched_with_icons(
1548+
manifest_provider,
1549+
corpus_http_client,
1550+
fixture_request_data,
1551+
):
1552+
"""Test the enrichment of a curated recommendation with an added icon-url."""
1553+
manifest_provider.manifest_data.domains = [
1554+
Domain(
1555+
rank=2,
1556+
title="Microsoft – AI, Cloud, Productivity, Computing, Gaming & Apps",
1557+
url="https://www.microsoft.com",
1558+
domain="microsoft.com",
1559+
icon="https://merino-images.services.mozilla.com/favicons/microsoft-icon.png",
1560+
categories=["Business", "Information Technology"],
1561+
serp_categories=[0],
1562+
)
1563+
]
1564+
1565+
manifest_provider.lookup_table = {"microsoft.com": 0}
1566+
1567+
mocked_response = {
1568+
"data": {
1569+
"scheduledSurface": {
1570+
"items": [
1571+
{
1572+
"id": "scheduledSurfaceItemId-ABC",
1573+
"corpusItem": {
1574+
"id": "corpusItemId-XYZ",
1575+
"url": "https://www.microsoft.com/some-article?utm_source=firefox-newtab-en-us",
1576+
"title": "Some MS Article",
1577+
"excerpt": "All about Microsoft something",
1578+
"topic": "tech",
1579+
"publisher": "ExamplePublisher",
1580+
"isTimeSensitive": False,
1581+
"imageUrl": "https://somewhere.com/test.jpg",
1582+
},
1583+
}
1584+
]
1585+
}
1586+
}
1587+
}
1588+
corpus_http_client.post.return_value = Response(
1589+
status_code=200,
1590+
json=mocked_response,
1591+
request=fixture_request_data,
1592+
)
1593+
1594+
async with AsyncClient(app=app, base_url="http://test") as ac:
1595+
response = await ac.post(
1596+
"/api/v1/curated-recommendations",
1597+
json={"locale": "en-US"},
1598+
)
1599+
assert response.status_code == 200
1600+
1601+
data = response.json()
1602+
items = data["data"]
1603+
assert len(items) == 1
1604+
1605+
item = items[0]
1606+
assert item["url"] == "https://www.microsoft.com/some-article?utm_source=firefox-newtab-en-us"
1607+
1608+
assert "icon" in item
1609+
assert item["icon"] == "https://merino-images.services.mozilla.com/favicons/microsoft-icon.png"

tests/integration/api/v1/manifest/test_manifest.py

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,6 @@
1111
from merino.web.api_v1 import router
1212

1313

14-
@pytest.fixture(scope="module")
15-
def mock_manifest_2025() -> dict:
16-
"""Mock manifest json for the year 2025"""
17-
return {
18-
"domains": [
19-
{
20-
"rank": 1,
21-
"domain": "spotify",
22-
"categories": ["Entertainment"],
23-
"serp_categories": [0],
24-
"url": "https://www.spotify.com",
25-
"title": "Spotify",
26-
"icon": "",
27-
}
28-
]
29-
}
30-
31-
3214
@pytest.fixture(autouse=True, name="cleanup")
3315
def cleanup_tasks_fixture():
3416
"""Return a method that cleans up existing cron tasks after initialization"""
@@ -46,9 +28,7 @@ async def cleanup_tasks(provider: Provider):
4628

4729

4830
@pytest.mark.asyncio
49-
async def test_get_manifest_success(
50-
client_with_metrics, gcp_uploader, mock_manifest_2025, cleanup
51-
):
31+
async def test_get_manifest_success(client_with_metrics, gcp_uploader, mock_manifest, cleanup):
5232
"""Uploads a manifest to the gcs bucket and verifies that the endpoint returns the uploaded file."""
5333
# initialize provider on startup
5434
await init_provider()
@@ -57,7 +37,7 @@ async def test_get_manifest_success(
5737
app.include_router(router, prefix="/api/v1")
5838

5939
# upload a manifest file to GCS test container
60-
gcp_uploader.upload_content(orjson.dumps(mock_manifest_2025), "top_picks_latest.json")
40+
gcp_uploader.upload_content(orjson.dumps(mock_manifest), "top_picks_latest.json")
6141

6242
provider = get_provider()
6343
await provider.data_fetched_event.wait()
@@ -69,7 +49,7 @@ async def test_get_manifest_success(
6949

7050
manifest = ManifestData(**response.json())
7151
assert len(manifest.domains) == 1
72-
assert manifest.domains[0].domain == "spotify"
52+
assert manifest.domains[0].domain == "spotify.com"
7353

7454
assert "Cache-Control" in response.headers
7555

tests/unit/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pytest_mock import MockerFixture
1414

1515
from merino.middleware.geolocation import Location
16+
from merino.providers.manifest.backends.protocol import ManifestData
1617
from merino.providers.suggest.base import SuggestionRequest
1718
from tests.unit.types import SuggestionRequestFixture
1819
from google.cloud.storage import Blob, Bucket, Client
@@ -102,3 +103,9 @@ def fixture_blob_json() -> str:
102103
]
103104
}
104105
)
106+
107+
108+
@pytest.fixture(name="manifest_data")
109+
def fixture_manifest_data(blob_json: str) -> ManifestData:
110+
"""Return parsed ManifestData object for testing."""
111+
return ManifestData.model_validate_json(blob_json)

tests/unit/providers/manifest/test_init_provider.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
from unittest.mock import patch
88
import pytest
99

10-
from merino.providers.manifest import get_provider, init_provider
11-
from merino.providers.manifest.provider import Provider as ManifestProvider
10+
from merino.providers.manifest import get_provider, init_provider, Provider
1211

1312

1413
@pytest.mark.asyncio
@@ -19,7 +18,7 @@ async def test_init_provider() -> None:
1918
from merino.providers.manifest import provider
2019

2120
assert provider is not None
22-
assert isinstance(provider, ManifestProvider)
21+
assert isinstance(provider, Provider)
2322
assert provider.name == "manifest"
2423

2524

0 commit comments

Comments
 (0)