feat: add continuous monitoring

toderian · toderian · commit 7443cb71e8b8 · 2025-12-17T11:57:02.000+02:00
diff --git a/extensions/business/cybersec/red_mesh/pentester_api_01.py b/extensions/business/cybersec/red_mesh/pentester_api_01.py
@@ -35,7 +35,7 @@
 from naeural_core.business.default.web_app.fast_api_web_app import FastApiWebAppPlugin as BasePlugin
 from .redmesh_utils import PentestLocalWorker  # Import PentestJob from separate module
 
-__VER__ = '0.8.1'  # updated version
+__VER__ = '0.8.2' 
 
 _CONFIG = {
   **BasePlugin.CONFIG,
@@ -55,6 +55,11 @@
   "PORT_ORDER": "SHUFFLE",  # "SHUFFLE" or "SEQUENTIAL"
   "EXCLUDED_FEATURES": [],
 
+  # Run mode: SINGLEPASS (default) or CONTINUOUS_MONITORING
+  "RUN_MODE": "SINGLEPASS",
+  "MONITOR_INTERVAL": 60,  # seconds between passes in continuous mode
+  "MONITOR_JITTER": 5,     # random jitter to avoid simultaneous CStore writes
+
   'VALIDATION_RULES': {
     **BasePlugin.CONFIG['VALIDATION_RULES'],  
   },
@@ -438,9 +443,20 @@ def _maybe_launch_jobs(self, nr_local_workers=None):
         current_worker_finished = worker_entry.get("finished", False)
         if current_worker_finished:
           continue
-        # If job not already running and not completed, start a new thread
+
+        # Check if this is a continuous monitoring job where our worker was reset
+        # (launcher reset our finished flag for next pass) - clear local tracking
         closed_target = job_id in self.completed_jobs_reports
-        in_progress_target = job_id in self.scan_jobs        
+        if closed_target and not current_worker_finished:
+          # Our worker entry was reset by launcher for next pass - clear local state
+          self.P(f"Detected worker reset for job {job_id}, clearing local tracking for next pass")
+          self.completed_jobs_reports.pop(job_id, None)
+          if job_id in self.lst_completed_jobs:
+            self.lst_completed_jobs.remove(job_id)
+          closed_target = False
+
+        # If job not already running and not completed, start a new thread
+        in_progress_target = job_id in self.scan_jobs
         if not in_progress_target and not closed_target:
           launcher = job_specs.get("launcher")
           launcher_alias = job_specs.get("launcher_alias")
@@ -552,6 +568,9 @@ def _close_job(self, job_id, canceled=False):
     """
     Close a local job, aggregate reports, and persist in CStore.
 
+    Reports are saved to R1FS (IPFS) and only the CID is stored in CStore
+    to avoid bloating the distributed state.
+
     Parameters
     ----------
     job_id : str
@@ -579,9 +598,32 @@ def _close_job(self, job_id, canceled=False):
         closing = "Forced" if canceled else "Post finish"
         worker_entry = job_specs.setdefault("workers", {}).setdefault(self.ee_addr, {})
         worker_entry["finished"] = True
-        worker_entry["result"] = report
         worker_entry["canceled"] = canceled
-        job_specs["workers"][self.ee_addr] = worker_entry
+
+        # Save full report to R1FS and store only CID in CStore
+        try:
+          report_cid = self.r1fs.add_json(report, show_logs=False)
+          if report_cid:
+            worker_entry["report_cid"] = report_cid
+            worker_entry["result"] = None  # No blob in CStore
+            self.P(f"Report saved to R1FS with CID: {report_cid}")
+          else:
+            # Fallback: store report directly if R1FS fails
+            self.P("R1FS add_json returned None, storing report directly in CStore", color='y')
+            worker_entry["report_cid"] = None
+            worker_entry["result"] = report
+        except Exception as e:
+          # Fallback: store report directly if R1FS fails
+          self.P(f"Failed to save report to R1FS: {e}. Storing directly in CStore", color='r')
+          worker_entry["report_cid"] = None
+          worker_entry["result"] = report
+
+        # Re-read job_specs to avoid overwriting concurrent updates (e.g., pass_history)
+        fresh_job_specs = self.chainstore_hget(hkey=self.cfg_instance_id, key=job_id)
+        if fresh_job_specs and isinstance(fresh_job_specs, dict):
+          fresh_job_specs["workers"][self.ee_addr] = worker_entry
+          job_specs = fresh_job_specs
+
         self.P("{} closing job_id {}:\n{}".format(
           closing,
           job_id,
@@ -641,6 +683,104 @@ def _maybe_close_jobs(self):
     return
 
 
+  def _maybe_schedule_next_pass(self):
+    """
+    Launcher orchestrates continuous monitoring passes.
+
+    For CONTINUOUS_MONITORING jobs, this method:
+    1. Detects when all workers have finished the current pass
+    2. Records pass completion in history and schedules the next pass
+    3. Resets all workers when it's time to start the next pass
+
+    Only the launcher node executes scheduling logic.
+
+    Returns
+    -------
+    None
+    """
+    all_jobs = self.chainstore_hgetall(hkey=self.cfg_instance_id)
+
+    for job_key, job_specs in all_jobs.items():
+      normalized_key, job_specs = self._normalize_job_record(job_key, job_specs)
+      if normalized_key is None:
+        continue
+
+      # Only handle continuous monitoring jobs that are still running
+      run_mode = job_specs.get("run_mode", "SINGLEPASS")
+      if run_mode != "CONTINUOUS_MONITORING":
+        continue
+      monitoring_status = job_specs.get("monitoring_status", "RUNNING")
+      if monitoring_status == "STOPPED":
+        continue
+
+      # Only launcher manages scheduling
+      is_launcher = job_specs.get("launcher") == self.ee_addr
+      if not is_launcher:
+        continue
+
+      workers = job_specs.get("workers", {})
+      if not workers:
+        continue
+
+      all_finished = all(w.get("finished") for w in workers.values())
+      next_pass_at = job_specs.get("next_pass_at")
+      job_pass = job_specs.get("job_pass", 1)
+      job_id = job_specs.get("job_id")
+
+      if all_finished and next_pass_at is None:
+        # ═══════════════════════════════════════════════════
+        # STATE: All peers completed current pass
+        # ═══════════════════════════════════════════════════
+        pass_history = job_specs.setdefault("pass_history", [])
+        pass_history.append({
+          "pass_nr": job_pass,
+          "completed_at": self.time(),
+          "reports": {addr: w.get("report_cid") for addr, w in workers.items()}
+        })
+
+        # Check if soft stop was scheduled
+        if monitoring_status == "SCHEDULED_FOR_STOP":
+          job_specs["monitoring_status"] = "STOPPED"
+          self.P(f"[CONTINUOUS] Pass {job_pass} complete for job {job_id}. Stopping (soft stop was scheduled)")
+          self.chainstore_hset(hkey=self.cfg_instance_id, key=job_key, value=job_specs)
+          continue
+
+        # Schedule next pass
+        interval = job_specs.get("monitor_interval", self.cfg_monitor_interval)
+        jitter = random.uniform(0, self.cfg_monitor_jitter)
+        job_specs["next_pass_at"] = self.time() + interval + jitter
+
+        self.P(f"[CONTINUOUS] Pass {job_pass} complete for job {job_id}. Next pass in {interval}s (+{jitter:.1f}s jitter)")
+        self.chainstore_hset(hkey=self.cfg_instance_id, key=job_key, value=job_specs)
+
+        # Clear from completed_jobs_reports to allow relaunch
+        self.completed_jobs_reports.pop(job_id, None)
+        if job_id in self.lst_completed_jobs:
+          self.lst_completed_jobs.remove(job_id)
+
+      elif all_finished and next_pass_at and self.time() >= next_pass_at:
+        # ═══════════════════════════════════════════════════
+        # STATE: Interval elapsed, start next pass
+        # ═══════════════════════════════════════════════════
+        job_specs["job_pass"] = job_pass + 1
+        job_specs["next_pass_at"] = None
+
+        for addr in workers:
+          workers[addr]["finished"] = False
+          workers[addr]["result"] = None
+          workers[addr]["report_cid"] = None
+
+        self.P(f"[CONTINUOUS] Starting pass {job_pass + 1} for job {job_id}", boxed=True)
+        self.chainstore_hset(hkey=self.cfg_instance_id, key=job_key, value=job_specs)
+
+        # Clear local tracking to allow relaunch
+        self.completed_jobs_reports.pop(job_id, None)
+        if job_id in self.lst_completed_jobs:
+          self.lst_completed_jobs.remove(job_id)
+    #end for each job
+    return
+
+
   def _get_all_network_jobs(self):
     """
     Retrieve all jobs tracked in CStore for this instance.
@@ -759,18 +899,20 @@ def list_features(self):
 
   @BasePlugin.endpoint(method="post")
   def launch_test(
-    self, 
-    target: str = "", 
-    start_port: int = 1, end_port: int = 65535, 
+    self,
+    target: str = "",
+    start_port: int = 1, end_port: int = 65535,
     exceptions: str = "64297",
     distribution_strategy: str = "",
     port_order: str = "",
     excluded_features: list[str] = None,
+    run_mode: str = "",
+    monitor_interval: int = 0,
   ):
     """
     Start a pentest on the specified target.
 
-    Announces the job to the network via CStore; actual executtarget:ion is handled
+    Announces the job to the network via CStore; actual execution is handled
     asynchronously by worker threads.
 
     Parameters
@@ -790,6 +932,11 @@ def launch_test(
       "SHUFFLE" to randomize port order; "SEQUENTIAL" for ordered scan.
     excluded_features: list[str], optional
       List of feature names to exclude from scanning.
+    run_mode: str, optional
+      "SINGLEPASS" (default) for one-time scan; "CONTINUOUS_MONITORING" for
+      repeated scans at monitor_interval.
+    monitor_interval: int, optional
+      Seconds between passes in CONTINUOUS_MONITORING mode (0 = use config).
 
     Returns
     -------
@@ -842,6 +989,13 @@ def launch_test(
     if not port_order or port_order not in ["SHUFFLE", "SEQUENTIAL"]:
       port_order = self.cfg_port_order
 
+    # Validate run_mode and monitor_interval
+    run_mode = str(run_mode).upper()
+    if not run_mode or run_mode not in ["SINGLEPASS", "CONTINUOUS_MONITORING"]:
+      run_mode = self.cfg_run_mode
+    if monitor_interval <= 0:
+      monitor_interval = self.cfg_monitor_interval
+
     chainstore_peers = self.cfg_chainstore_peers
     num_workers = len(chainstore_peers)
 
@@ -901,6 +1055,13 @@ def launch_test(
       "port_order": port_order,
       "excluded_features": excluded_features,
       "enabled_features": enabled_features,
+      # Continuous monitoring fields
+      "run_mode": run_mode,
+      "monitor_interval": monitor_interval,
+      "monitoring_status": "RUNNING",  # RUNNING | SCHEDULED_FOR_STOP | STOPPED
+      "job_pass": 1,
+      "next_pass_at": None,
+      "pass_history": [],
     }
     self.chainstore_hset(
       hkey=self.cfg_instance_id,
@@ -1017,6 +1178,82 @@ def stop_and_delete_job(self, job_id : str):
     return {"status": "success", "job_id": job_id}
 
 
+  @BasePlugin.endpoint
+  def get_report(self, cid: str):
+    """
+    Retrieve a full report from R1FS by CID.
+
+    Parameters
+    ----------
+    cid : str
+      Content identifier of the report stored in R1FS.
+
+    Returns
+    -------
+    dict
+      The full report data or error message.
+    """
+    if not cid:
+      return {"error": "No CID provided"}
+    try:
+      report = self.r1fs.get_json(cid)
+      if report is None:
+        return {"error": "Report not found", "cid": cid}
+      return {"cid": cid, "report": report}
+    except Exception as e:
+      self.P(f"Failed to retrieve report from R1FS: {e}", color='r')
+      return {"error": str(e), "cid": cid}
+
+
+  @BasePlugin.endpoint(method="post")
+  def stop_monitoring(self, job_id: str, stop_type: str = "SOFT"):
+    """
+    Stop continuous monitoring for a job.
+
+    Parameters
+    ----------
+    job_id : str
+      Identifier of the job to stop monitoring.
+    stop_type : str, optional
+      "SOFT" (default): Let current pass complete, then stop.
+      Sets monitoring_status="SCHEDULED_FOR_STOP".
+      "HARD": Stop immediately. Sets monitoring_status="STOPPED".
+
+    Returns
+    -------
+    dict
+      Status including job_id and passes completed.
+    """
+    raw_job_specs = self.chainstore_hget(hkey=self.cfg_instance_id, key=job_id)
+    if not raw_job_specs:
+      return {"error": "Job not found", "job_id": job_id}
+
+    _, job_specs = self._normalize_job_record(job_id, raw_job_specs)
+    if job_specs.get("run_mode") != "CONTINUOUS_MONITORING":
+      return {"error": "Job is not in CONTINUOUS_MONITORING mode", "job_id": job_id}
+
+    stop_type = str(stop_type).upper()
+    passes_completed = job_specs.get("job_pass", 1)
+
+    if stop_type == "HARD":
+      job_specs["monitoring_status"] = "STOPPED"
+      self.P(f"[CONTINUOUS] Hard stop for job {job_id} after {passes_completed} passes")
+    else:
+      # SOFT stop - let current pass complete
+      job_specs["monitoring_status"] = "SCHEDULED_FOR_STOP"
+      self.P(f"[CONTINUOUS] Soft stop scheduled for job {job_id} (will stop after current pass)")
+
+    self.chainstore_hset(hkey=self.cfg_instance_id, key=job_id, value=job_specs)
+
+    return {
+      "monitoring_status": job_specs["monitoring_status"],
+      "stop_type": stop_type,
+      "job_id": job_id,
+      "passes_completed": passes_completed,
+      "pass_history": job_specs.get("pass_history", []),
+    }
+
+
   def process(self):
     """
     Periodic task handler: launch new jobs and close completed ones.
@@ -1032,9 +1269,11 @@ def process(self):
       return
     elif not self.__warmup_done:
       self.__post_init()
-    #endif 
+    #endif
     # Launch any new jobs
     self._maybe_launch_jobs()
     # Check active jobs for completion
     self._maybe_close_jobs()
+    # Handle continuous monitoring scheduling (launcher only)
+    self._maybe_schedule_next_pass()
     return