99import traceback
1010import asyncio
1111import urllib .parse
12+ import logging
1213
1314from wpull .application .hook import Actions
1415from wpull .application .plugin import WpullPlugin , PluginFunctions , hook , event
@@ -146,13 +147,15 @@ def activate(self):
146147 self .loop = asyncio .get_event_loop ()
147148 self .enable_stdio_capture ()
148149 self .add_signal_handlers ()
150+ self .logger = logging .getLogger ("grab_site.wpull_plugin" )
149151 self .init_job_data ()
150152 self .init_ws ()
151153 self .setup_watchers ()
152154 self .all_start_urls = open (cf ("all_start_urls" )).read ().rstrip ("\n " ).split ("\n " )
153155 self .all_start_netlocs = set (urllib .parse .urlparse (url ).netloc for url in self .all_start_urls )
154156 self .skipped_videos = open (cf ("skipped_videos" ), "w" , encoding = "utf-8" )
155157 self .skipped_max_content_length = open (cf ("skipped_max_content_length" ), "w" , encoding = "utf-8" )
158+ self .compiled_ignores = []
156159 self .update_ignores ()
157160 super ().activate ()
158161
@@ -255,6 +258,7 @@ def update_max_content_length(self):
255258 return
256259 with open (self .watchers ["max_content_length" ].fname , "r" ) as f :
257260 self .job_data ["max_content_length" ] = int (f .read ().strip ())
261+ self .logger .info (f"Settings change: max_content_length = { self .job_data ['max_content_length' ]} " )
258262
259263 @swallow_exception
260264 def update_delay (self ):
@@ -266,6 +270,8 @@ def update_delay(self):
266270 self .job_data ["delay_min" ], self .job_data ["delay_max" ] = list (int (s ) for s in content .split ("-" , 1 ))
267271 else :
268272 self .job_data ["delay_min" ] = self .job_data ["delay_max" ] = int (content )
273+ max_string = f"-{ self .job_data ['delay_max' ]} " if self .job_data ["delay_min" ] != self .job_data ["delay_max" ] else ""
274+ self .logger .info (f"Settings change: delay = { self .job_data ['delay_min' ]} { max_string } " )
269275
270276 @swallow_exception
271277 def update_concurrency (self ):
@@ -278,6 +284,7 @@ def update_concurrency(self):
278284 concurrency = 1
279285 self .job_data ["concurrency" ] = concurrency
280286 self .app_session .factory ["PipelineSeries" ].concurrency = concurrency
287+ self .logger .info (f"Settings change: concurrency = { concurrency } " )
281288
282289 stop_path = cf ("stop" )
283290 def should_stop (self ):
@@ -298,6 +305,9 @@ def update_video(self):
298305 @swallow_exception
299306 def update_scrape (self ):
300307 scrape = path_exists_with_cache (self .scrape_path )
308+ if scrape == self .job_data ["scrape" ]:
309+ return
310+ self .logger .info (f"Settings change: scrape = { scrape } " )
301311 self .job_data ["scrape" ] = scrape
302312 if not scrape :
303313 # Empty the list of scrapers, which will stop scraping for new URLs
@@ -329,6 +339,15 @@ def update_ignores(self):
329339 for ig in sorted (ignores ):
330340 self .print_to_terminal (f"\t { ig } " )
331341
342+ # Log changes
343+ old_ignores = set (x [0 ] for x in self .compiled_ignores )
344+ added_ignores = ignores - old_ignores
345+ removed_ignores = old_ignores - ignores
346+ for ig in added_ignores :
347+ self .logger .info (f"Adding ignore: { ig } " )
348+ for ig in removed_ignores :
349+ self .logger .info (f"Removing ignore: { ig } " )
350+
332351 self .compiled_ignores = [(ig , re_compile (ig )) for ig in ignores ]
333352 self .combined_ignore_regexp = compile_combined_regexp (ignores )
334353
@@ -364,6 +383,7 @@ def accept_url(self, item_session: ItemSession, verdict: bool, reasons: dict):
364383
365384 should_ignore = self .should_ignore_url (url , record_info )
366385 if should_ignore :
386+ self .logger .info (f"Ignoring ‘{ url } ’" )
367387 if not self .job_data ["suppress_ignore_reports" ]:
368388 pattern = self .get_specific_ignore_pattern (url )
369389 self .maybe_log_ignore (url , pattern )
0 commit comments