@@ -112,8 +112,7 @@ def __init__(
112112 self ,
113113 tap ,
114114 name : str ,
115- * ,
116- file_path : str | None ,
115+ file_path : str | None = None ,
117116 primary_keys : list [str ] | None = None ,
118117 skip_past_reference : str | None = None ,
119118 skip_past : bool | None = False ,
@@ -122,6 +121,9 @@ def __init__(
122121 bboxes : list [tuple [float , float , float , float ]] | None = None ,
123122 ** kwargs ,
124123 ):
124+
125+ super ().__init__ (tap = tap , name = name , ** kwargs )
126+
125127 self .file_path = file_path
126128 self .extra_files = extra_files or ([file_path ] if file_path else [])
127129 self .primary_keys = primary_keys or [
@@ -157,10 +159,9 @@ def __init__(
157159 raise ValueError (f"Cannot ignore core fields: { ', ' .join (sorted (invalid ))} " )
158160 self .ignore_fields = ignore_fields
159161
160- # now call parent init with only tap/name/kwargs
161- super ().__init__ (tap = tap , name = name , ** kwargs )
162+ # super().__init__(tap=tap, name=name, **kwargs)
162163
163- self .state_partitioning_keys = [SDC_FILENAME ]
164+ # self.state_partitioning_keys = [SDC_FILENAME]
164165 self .replication_key = SDC_INCREMENTAL_KEY
165166 self .forced_replication_method = "INCREMENTAL"
166167
@@ -210,12 +211,27 @@ def get_records(
210211 dict [str , t .Any ] | tuple [dict [t .Any , t .Any ], dict [t .Any , t .Any ] | None ]
211212 ]:
212213
214+ start_mtime : datetime | None = self .get_starting_timestamp (context )
215+ if start_mtime and start_mtime .tzinfo is None :
216+ start_mtime = start_mtime .replace (tzinfo = timezone .utc )
217+ elif start_mtime :
218+ start_mtime = start_mtime .astimezone (timezone .utc )
219+
213220 for path in self .extra_files :
214221 self .logger .info (f"[{ self .name } ] Streaming records from { path } " )
215222 storage = Storage (path )
216223 info = storage .describe (path )
217224 mtime = info .mtime
218225
226+ if start_mtime is not None and mtime is not None and mtime <= start_mtime :
227+ self .logger .info (
228+ "Skipping %s (mtime=%s <= bookmark=%s)" ,
229+ path ,
230+ mtime ,
231+ start_mtime ,
232+ )
233+ continue
234+
219235 filename = info .path
220236 partition_context = {SDC_FILENAME : filename }
221237 last_bookmark = self .get_starting_replication_key_value (partition_context )
@@ -273,15 +289,6 @@ def get_records(
273289 if lats .size == 0 :
274290 continue
275291
276- for msg in grbs :
277- try :
278- lats , lons , vals = _extract_grid (msg )
279- except Exception as e :
280- self .logger .warning (f"Skipping message: { e } " )
281- continue
282- if lats .size == 0 :
283- continue
284-
285292 # safe datetime extraction
286293 valid_dt = getattr (msg , "validDate" , None )
287294 if valid_dt is None :
0 commit comments