1212
1313from merino .optimizers .models import EngagementMetrics , ThompsonCandidate
1414from merino .optimizers .thompson import ThompsonSampler
15- from merino .providers .suggest .adm .backends .protocol import FormFactor
15+ from merino .providers .suggest .adm .backends .protocol import EngagementData , FormFactor
16+ from merino .utils .gcs .engagement .filemanager import EngagementFilemanager
1617from merino .utils import cron
1718from merino .providers .suggest .adm .backends .protocol import AdmBackend , SuggestionContent
18- from merino .providers .suggest .base import BaseProvider , BaseSuggestion , SuggestionRequest
19+ from merino .providers .suggest .base import (
20+ BaseProvider ,
21+ BaseSuggestion ,
22+ SuggestionRequest ,
23+ )
1924
2025logger = logging .getLogger (__name__ )
2126
@@ -82,6 +87,11 @@ class Provider(BaseProvider):
8287 resync_interval_sec : float
8388 min_attempted_count : int
8489 thompson : ThompsonSampler | None = None
90+ engagement_data : EngagementData
91+ filemanager : EngagementFilemanager
92+ engagement_resync_interval_sec : float
93+ last_engagement_fetch_at : float
94+ engagement_cron_task : asyncio .Task
8595
8696 def __init__ (
8797 self ,
@@ -90,6 +100,9 @@ def __init__(
90100 name : str ,
91101 resync_interval_sec : float ,
92102 cron_interval_sec : float ,
103+ engagement_gcs_bucket : str ,
104+ engagement_blob_name : str ,
105+ engagement_resync_interval_sec : float ,
93106 enabled_by_default : bool = True ,
94107 min_attempted_count : int = 0 ,
95108 thompson : ThompsonSampler | None = None ,
@@ -105,6 +118,13 @@ def __init__(
105118 self ._enabled_by_default = enabled_by_default
106119 self .min_attempted_count = min_attempted_count
107120 self .thompson = thompson
121+ self .engagement_data = EngagementData (amp = {}, amp_aggregated = {})
122+ self .engagement_resync_interval_sec = engagement_resync_interval_sec
123+ self .last_engagement_fetch_at = 0
124+ self .filemanager = EngagementFilemanager (
125+ gcs_bucket_path = engagement_gcs_bucket ,
126+ blob_name = engagement_blob_name ,
127+ )
108128 super ().__init__ (** kwargs )
109129
110130 async def initialize (self ) -> None :
@@ -132,15 +152,46 @@ async def initialize(self) -> None:
132152 # reference to it.
133153 self .cron_task = asyncio .create_task (cron_job ())
134154
155+ engagement_cron_job = cron .Job (
156+ name = "resync_engagement_data" ,
157+ interval = self .cron_interval_sec ,
158+ condition = self ._should_fetch_engagement ,
159+ task = self ._fetch_engagement_data ,
160+ )
161+ self .engagement_cron_task = asyncio .create_task (engagement_cron_job ())
162+
135163 def _should_fetch (self ) -> bool :
136164 """Check if it should fetch data from Remote Settings."""
137165 return (time .time () - self .last_fetch_at ) >= self .resync_interval_sec
138166
167+ def _should_fetch_engagement (self ) -> bool :
168+ """Check if it should fetch engagement data from GCS."""
169+ return (time .time () - self .last_engagement_fetch_at ) >= self .engagement_resync_interval_sec
170+
139171 async def _fetch (self ) -> None :
140172 """Fetch suggestions, keywords, and icons from Remote Settings."""
141173 self .suggestion_content = await self .backend .fetch ()
142174 self .last_fetch_at = time .time ()
143175
176+ async def _fetch_engagement_data (self ) -> None :
177+ """Fetch engagement data from GCS and store it in memory.
178+
179+ If the fetch returns no data, `last_engagement_fetch_at` is not updated
180+ so the cron job retries on the next tick.
181+ """
182+ try :
183+ data = await self .filemanager .get_file ()
184+ if data is None :
185+ logger .warning ("Engagement data fetch returned None, will retry on next tick" )
186+ return
187+ self .engagement_data = EngagementData .model_validate (data .model_dump ())
188+ self .last_engagement_fetch_at = time .time ()
189+ except Exception as e :
190+ logger .warning (
191+ "Failed to fetch engagement data from GCS" ,
192+ extra = {"error" : str (e )},
193+ )
194+
144195 def hidden (self ) -> bool : # noqa: D102
145196 return False
146197
0 commit comments