11import logging
22from abc import ABC
3- from datetime import datetime , timedelta , timezone
4- from typing import Dict , Generator , List , Optional
3+ from datetime import timedelta
4+ from typing import Dict , Generator , List , Optional , Type
55
66import httpx
77from django .db import transaction
8+ from django .utils import timezone
89
910from alert_system .helpers import build_stac_search
1011from alert_system .models import Connector , ExtractionItem , LoadItem
1112
13+ from .config import ExtractionConfig
1214from .loader import BaseLoaderClass
1315from .transform import BaseTransformerClass
1416
@@ -29,19 +31,28 @@ class BaseExtractionClass(ABC):
2931 """
3032
3133 event_collection_type : str
32- hazard_collection_type : str | None
33- impact_collection_type : str | None
34+ transformer_class : Type [BaseTransformerClass ]
35+ loader_class : Type [BaseLoaderClass ]
36+
37+ hazard_collection_type : Optional [str ] = None
38+ impact_collection_type : Optional [str ] = None
39+
3440 filter_event : Optional [Dict ] = None
3541 filter_hazard : Optional [Dict ] = None
3642 filter_impact : Optional [Dict ] = None
37- transformer_class : type [ BaseTransformerClass ]
38- loader_class : type [ BaseLoaderClass ]
43+
44+ config : ExtractionConfig
3945
4046 def __init__ (self , connector : Connector ):
4147 self .connector = connector
4248 self .base_url = connector .source_url .rstrip ("/" )
49+ self .load_config ()
4350 self ._validate_required_attributes ()
4451
52+ def load_config (self ):
53+ for key , value in self .config .items ():
54+ setattr (self , key , value )
55+
4556 def _validate_required_attributes (self ):
4657 missing_attr = []
4758 if not getattr (self , "event_collection_type" , None ):
@@ -96,10 +107,10 @@ def get_datetime_filter(self) -> str:
96107 ISO 8601 datetime range string
97108 """
98109
99- now = datetime .now (timezone . utc )
110+ now = timezone .now ()
100111 last_run = self .connector .last_success_run
101112
102- start_time = last_run if last_run else (now - timedelta (days = 20 )) # NOTE: Arbitrary value for failure case.
113+ start_time = last_run if last_run else (now - timedelta (days = 30 )) # NOTE: Arbitrary value for failure case.
103114 return f"{ start_time .isoformat ()} /{ now .isoformat ()} "
104115
105116 def _save_stac_item (self , stac_id : str , defaults : Dict ) -> Optional [ExtractionItem ]:
@@ -200,7 +211,7 @@ def process_event_items(self, extraction_run_id: str, correlation_id: str | None
200211 ),
201212 )
202213 except Exception as e :
203- logger .error (f"Failed to fetch events: { e } " )
214+ logger .warning (f"Failed to fetch events: { e } " )
204215 raise
205216
206217 for feature in event_items :
@@ -233,23 +244,20 @@ def process_event_items(self, extraction_run_id: str, correlation_id: str | None
233244 logger .info (f"Successfully processed event { event_id } " )
234245
235246 except Exception as e :
236- logger .error (f"Failed to process event { event_id } : { e } " , exc_info = True )
247+ logger .warning (f"Failed to process event { event_id } : { e } " , exc_info = True )
237248 raise
238249
239250 def run (self , extraction_run_id : str , correlation_id : str | None = None , is_past_event : bool = False ) -> None :
240251 """Main entry point for running the connector."""
241- logger .info (f"Starting connector run for { self .connector } " )
242-
243252 try :
244253 self .process_event_items (extraction_run_id , correlation_id , is_past_event )
245- logger .info ("Connector run completed successfully" )
246254 except Exception as e :
247- logger .error (f"Connector run failed: { e } " , exc_info = True )
255+ logger .warning (f"Connector run failed: { e } " , exc_info = True )
248256 raise
249257
250258
251259class PastEventExtractionClass :
252- LOOKBACK_WEEKS = 16
260+ LOOKBACK_WEEKS = 520
253261
254262 def __init__ (self , extractor : BaseExtractionClass ):
255263 self .extractor = extractor
@@ -268,6 +276,13 @@ def _impact_filter(self, impact_metadata: list[dict]) -> str:
268276
269277 return " OR " .join (f"({ filter } )" for filter in filters )
270278
279+ def _country_filter (self , country_codes ) -> list [str ]:
280+ filters = []
281+ if country_codes :
282+ country_cql = " OR " .join (f"a_contains(monty:country_codes, '{ code } ')" for code in country_codes )
283+ filters .append (f"({ country_cql } )" )
284+ return filters
285+
271286 def _hazard_filter (self , unit : str , value : int ) -> str :
272287 return f"monty:hazard_detail.severity_unit = '{ unit } ' AND " f"monty:hazard_detail.severity_value >= { value } "
273288
@@ -280,21 +295,31 @@ def _collect_corr_ids(self, features, exclude: str) -> set[str]:
280295 return corr_ids
281296
282297 def find_related_corr_ids (self , load_obj : LoadItem ) -> set [str ]:
283- start = datetime .now (timezone . utc ) - timedelta (weeks = self .LOOKBACK_WEEKS )
284- end = datetime .now (timezone . utc )
298+ start = timezone .now () - timedelta (weeks = self .LOOKBACK_WEEKS )
299+ end = timezone .now ()
285300
286301 corr_ids = set ()
287302
288303 if self .extractor .impact_collection_type :
289304 impact_filter = self ._impact_filter (load_obj .impact_metadata )
305+ country_filters = self ._country_filter (load_obj .country_codes )
306+
307+ additional_filters = []
308+
309+ if impact_filter :
310+ additional_filters .append (impact_filter )
311+
312+ additional_filters .extend (country_filters )
313+
290314 features = self .extractor .fetch_stac_data (
291315 self .base_url ,
292316 build_stac_search (
293317 collections = self .extractor .impact_collection_type ,
294- additional_filters = [ impact_filter ] if impact_filter else [] ,
318+ additional_filters = additional_filters ,
295319 datetime_range = f"{ start .isoformat ()} /{ end .isoformat ()} " ,
296320 ),
297321 )
322+
298323 corr_ids |= self ._collect_corr_ids (features , load_obj .correlation_id )
299324
300325 # NOTE: Returns too many correlation_ids.
0 commit comments