4141]
4242
4343
44- class RateLimitedHandler (FileSystemEventHandler ):
44+ class RateLimitedFilesystemEventHandler (FileSystemEventHandler ):
4545 """File system event handler with rate limiting capabilities.
4646
4747 This handler processes file system events from watchdog, specifically watching
@@ -70,7 +70,8 @@ class RateLimitedHandler(FileSystemEventHandler):
7070 datastore : EpuAcquisitionSessionStore | None = None
7171 watch_dir : Path | None = None
7272
73- def __init__ (self , patterns : list [str ], log_interval : float = 10.0 , verbose : bool = False ):
73+ # TODO test with a lower log_interval value, set lowest possible default, better naming
74+ def __init__ (self , watch_dir , dry_run : bool = False , api_url : str | None = None , verbose : bool = False , log_interval : float = 10.0 , patterns : list [str ] = DEFAULT_PATTERNS ):
7475 self .last_log_time = time .time ()
7576 self .log_interval = log_interval
7677 self .patterns = patterns
@@ -82,7 +83,10 @@ def __init__(self, patterns: list[str], log_interval: float = 10.0, verbose: boo
8283 # Maintain a buffer of "orphaned" files - files that appear to belong to a grid that doesn't exist yet
8384 self .orphaned_files = {} # path -> (event, timestamp, file_stat)
8485
85- # TODO unit test thi method
86+ self ._set_watch_dir (watch_dir )
87+ self ._init_datastore (dry_run , api_url )
88+
89+ # TODO unit test this method
8690 def matches_pattern (self , path : str ) -> bool :
8791 try :
8892 rel_path = str (Path (path ).relative_to (self .watch_dir ))
@@ -91,29 +95,28 @@ def matches_pattern(self, path: str) -> bool:
9195 except ValueError :
9296 return False
9397
94- def set_watch_dir (self , path : Path ):
95- self .watch_dir = path .absolute () # TODO this could cause problems in Win
98+ # TODO on Win there's primary and secondary output dirs - work directly with primary if possible otherwise
99+ # operate across both. Note: data is first written to primary output dir then later maybe partially copied
100+ # to secondary dir.
101+ def _set_watch_dir (self , path : Path ):
102+ self .watch_dir = path .absolute () # TODO this could cause problems in Win - test!
96103
97- def init_datastore (self , dry_run : bool = False , api_url : str = None ):
98- logging .info (f"Instantiated new datastore, dry run: { dry_run } " )
104+ def _init_datastore (self , dry_run : bool = False , api_url : str = None ):
99105 self .datastore = EpuAcquisitionSessionStore (str (self .watch_dir ), dry_run , api_url )
106+ logging .debug (f"Instantiated new datastore, " + ("in-memory only" if self .datastore .in_memory_only else "data will be permanently saved on the backend" ))
107+ if self .datastore .in_memory_only :
108+ logging .info (f"Acquisition session uuid assigned: { self .datastore .uuid } " )
100109
110+ # TODO Enhancement: log all events to graylog (if reachable) for session debugging and playback
101111 def on_any_event (self , event ):
102- if self .watch_dir is None :
103- raise RuntimeError ("watch_dir not initialized - call set_watch_dir() first" )
104- if self .datastore is None :
105- raise RuntimeError ("datastore not initialized - call init_datastore() first" )
106-
107- # Enhancement: record all events to graylog (if reachable) for session debugging and playback
108-
109112 if event .is_directory or not self .matches_pattern (event .src_path ):
110113 if event .is_directory :
111- logging .info (f"Skipping non-matching path: { event .src_path } " )
114+ logging .debug (f"Skipping non-matching path: { event .src_path } " )
112115 return
113116
114117 if event .event_type not in self .watched_event_types :
115118 if event .is_directory :
116- logging .info (f"Skipping non-matching event type: { event .event_type } " )
119+ logging .debug (f"Skipping non-matching event type: { event .event_type } " )
117120 return
118121
119122 current_time = time .time ()
@@ -135,18 +138,15 @@ def on_any_event(self, event):
135138 if new_file_detected and re .search (EpuParser .session_dm_pattern , event .src_path ):
136139 assert self .datastore .get_grid_by_path (event .src_path ) is None # guaranteed because is a new file
137140 grid = Grid (str (Path (event .src_path ).parent .resolve ()))
138- session_data = EpuParser .parse_epu_session_manifest (
139- str (Path (event .src_path ).resolve ())
140- ) # just to get the name really, techdebt
141- self .datastore .grids .add (session_data .name , grid )
141+ self .datastore .add_grid (grid )
142142
143143 # try to work out which grid the touched file relates to
144144 grid_id = self .datastore .get_grid_by_path (event .src_path )
145145 if grid_id is None :
146146 # This must be an orphaned file since it matched one of patterns for files we are interested in,
147147 # but a containing grid doesn't exist yet - store it for when we have the grid.
148148 if self .verbose :
149- logging .info (
149+ logging .debug (
150150 f"Could not determine which grid this data belongs to: { event .src_path } , adding to orphans"
151151 )
152152 self .orphaned_files [event .src_path ] = (event , current_time , file_stat )
@@ -169,27 +169,27 @@ def on_any_event(self, event):
169169 self ._on_micrograph_detected (path , grid_id , new_file_detected )
170170
171171 def _on_session_detected (self , path : str , grid_id , is_new_file : bool = True ):
172- logging .info (f"Session manifest { 'detected' if is_new_file else 'updated' } : { path } " )
172+ logging .debug (f"Session manifest { 'detected' if is_new_file else 'updated' } : { path } " )
173173 session_data = EpuParser .parse_epu_session_manifest (path )
174174 gridstore = self .datastore .grids .get (grid_id )
175175
176176 if gridstore and session_data != gridstore .session_data :
177177 # Create the acquisition first if we're not in dry run mode
178178 if not self .datastore .in_memory_only and self .datastore .api_client :
179- success = self .datastore .api_client .create ("acquisition" , session_data . id , session_data )
179+ success = self .datastore .api_client .create ("acquisition" , self . datastore . uuid , session_data )
180180 if success :
181- logging .info (f"Created acquisition for session { session_data .name } " )
181+ logging .info (f"Created acquisition for session # { self . datastore . uuid } { session_data .name } " )
182182
183183 gridstore .session_data = session_data
184- logging .info (f"Updated session data for grid { grid_id } " )
184+ logging .debug (f"Updated session data for grid { grid_id } " )
185185 logging .info (gridstore .session_data )
186186
187187 def _process_orphaned_files (self , grid_id ):
188188 """Process any orphaned files that belong to this grid"""
189189 for path , (event , timestamp , file_stat ) in self .orphaned_files .items ():
190190 # Check if this orphaned file belongs to the new grid
191191 if self .datastore .get_grid_by_path (path ) == grid_id :
192- logging .info (f"Processing previously orphaned file: { path } " )
192+ logging .debug (f"Processing previously orphaned file: { path } " )
193193 self .on_any_event (event ) # Process the file as if we just received the event
194194
195195 # Create a new dictionary excluding the processed files
@@ -198,15 +198,15 @@ def _process_orphaned_files(self, grid_id):
198198 }
199199
200200 def _on_atlas_detected (self , path : str , grid_id , is_new_file : bool = True ):
201- logging .info (f"Atlas { 'detected' if is_new_file else 'updated' } : { path } " )
201+ logging .debug (f"Atlas { 'detected' if is_new_file else 'updated' } : { path } " )
202202 gridstore = self .datastore .grids .get (grid_id )
203203 atlas_data = EpuParser .parse_atlas_manifest (path )
204204 if atlas_data != gridstore .atlas_data :
205205 gridstore .atlas_data = atlas_data
206206 logging .info (gridstore .atlas_data )
207207
208208 def _on_gridsquare_metadata_detected (self , path : str , grid_id , is_new_file : bool = True ):
209- logging .info (f"Gridsquare metadata { 'detected' if is_new_file else 'updated' } : { path } " )
209+ logging .debug (f"Gridsquare metadata { 'detected' if is_new_file else 'updated' } : { path } " )
210210
211211 gridsquare_id = EpuParser .gridsquare_dm_file_pattern .search (path ).group (1 )
212212 assert gridsquare_id is not None , f"gridsquare_id should not be None: { gridsquare_id } "
0 commit comments