fix: Linux E2E test alert loop, dashboard polish

leos565 · leos565 · commit 54c347ebe62d · 2026-02-26T15:34:02.000+02:00
- Use CLAWEDR_TARGET_BINARY mock instead of overwriting openclaw (prevents
  dashboard/alert-dispatch from running mock and causing infinite alert loop)
- Add log rotation (RotatingFileHandler 512KB) for clawedr.log
- Collapse rapid duplicate alerts by rule_id+normalized details
- Increase alerts poll interval 10s-&gt;30s
- Test cleanup: truncate log, restart monitor normally, clear /tmp

Made-with: Cursor
diff --git a/deploy/compiled_policy.json b/deploy/compiled_policy.json
diff --git a/deploy/dashboard/app.py b/deploy/dashboard/app.py
@@ -127,7 +127,7 @@ def _parse_log_lines(max_lines: int = 1000) -> list[dict]:
             unique.append(a)
     alerts = unique
 
-    # Sort by timestamp descending (newest first) before capping
+    # Sort by timestamp descending (newest first)
     def _parse_ts(ts_str: str) -> datetime.datetime:
         try:
             cleaned = re.sub(r"[,.]\d+$", "", ts_str)
@@ -136,6 +136,20 @@ def _parse_ts(ts_str: str) -> datetime.datetime:
             return datetime.datetime.min
 
     alerts.sort(key=lambda a: _parse_ts(a.get("timestamp", "")), reverse=True)
+
+    # Collapse rapid duplicates: same rule_id + normalized details (ignore pid)
+    # keeps one per "event type" to avoid flooding from burst blocks
+    def _norm(s: str) -> str:
+        return re.sub(r"pid=\d+", "pid=*", s)
+
+    seen_norm: set[tuple] = set()
+    collapsed: list[dict] = []
+    for a in alerts:
+        nkey = (a["rule_id"], _norm(a["details"]))
+        if nkey not in seen_norm:
+            seen_norm.add(nkey)
+            collapsed.append(a)
+    alerts = collapsed
     return alerts[:100]  # Cap at 100 most recent
 
 
diff --git a/deploy/dashboard/templates/index.html b/deploy/dashboard/templates/index.html
@@ -1778,7 +1778,7 @@ <h2 id="modalTitle">Add Custom Rule</h2>
         // ── Init ──
         loadStatus().then(() => loadRules().then(() => loadUserRules()));
         loadSessions(); loadAlerts(); loadCustomRules();
-        setInterval(loadAlerts, 10000);
+        setInterval(loadAlerts, 30000);
         setInterval(loadSessions, 15000);
     </script>
 </body>
diff --git a/deploy/linux/monitor.py b/deploy/linux/monitor.py
@@ -30,6 +30,7 @@
 
 import ctypes
 import fnmatch
+import logging.handlers
 import glob as globmod
 import json
 import logging
@@ -502,6 +503,8 @@ def apply_policy(policy: dict) -> None:
                 policy.setdefault("malicious_hashes", {})[rid] = val.removeprefix("sha256:")
             elif rtype == "path":
                 policy.setdefault("blocked_paths", {})[rid] = val
+            elif rtype == "domain":
+                policy.setdefault("blocked_domains", {})[rid] = val
             elif rtype == "argument":
                 # Inject as a deny_rule with argv matching
                 deny_list = policy.setdefault("deny_rules", [])
@@ -634,6 +637,7 @@ def _apply_blocked_paths(policy: dict, exempted: set[str]) -> None:
 # ---------------------------------------------------------------------------
 
 def _apply_deny_rules(policy: dict, exempted: set[str]) -> None:
+    """Load deny_rules for argv matching. blocked_domains are injected as domain-in-cmdline rules."""
     global _deny_rules
     raw = policy.get("deny_rules", {})
     if isinstance(raw, dict) and "linux" in raw:
@@ -649,6 +653,15 @@ def _apply_deny_rules(policy: dict, exempted: set[str]) -> None:
         if isinstance(rule, dict) and rule.get("match"):
             _deny_rules[rule_id] = rule
 
+    # Inject blocked_domains as argv deny_rules (match cmdline containing domain)
+    for rule_id, domain in policy.get("blocked_domains", {}).items():
+        if rule_id in exempted:
+            skipped += 1
+            continue
+        if rule_id in _deny_rules:
+            continue
+        _deny_rules[rule_id] = {"match": f"*{domain}*", "rule": "blocked_domain", "scope": "argv"}
+
     logger.info("Loaded %d deny_rules for userspace matching (%d skipped)",
                 len(_deny_rules), skipped)
 
@@ -679,15 +692,15 @@ def _apply_blocked_ips(policy: dict, exempted: set[str]) -> None:
         return
         
     raw_ips = policy.get("blocked_ips", {})
-    raw_domains = policy.get("blocked_domains", {})
-    
+
     _ip_to_rule_id = {}
     ip_map = _bpf_instance["blocked_ips"]
     ip_map.clear()
-    
-    import socket, struct
+
+    import socket
+    import struct
     loaded = 0
-    
+
     for rule_id, ip in raw_ips.items():
         if rule_id in exempted:
             continue
@@ -698,20 +711,8 @@ def _apply_blocked_ips(policy: dict, exempted: set[str]) -> None:
             loaded += 1
         except Exception:
             pass
-            
-    for rule_id, dom in raw_domains.items():
-        if rule_id in exempted:
-            continue
-        try:
-            ip = socket.gethostbyname(dom)
-            ip_int = struct.unpack("=I", socket.inet_aton(ip))[0]
-            ip_map[ctypes.c_uint32(ip_int)] = ctypes.c_uint8(1)
-            _ip_to_rule_id[ip] = rule_id
-            loaded += 1
-        except Exception:
-            pass
-            
-    logger.info("Loaded %d network IPs/domains to BPF map", loaded)
+
+    logger.info("Loaded %d blocked IPs to BPF map", loaded)
 
 
 # ---------------------------------------------------------------------------
@@ -842,7 +843,11 @@ def setup_logging() -> None:
     block_logger.addHandler(block_stdout)
 
     try:
-        block_fh = logging.FileHandler(BLOCK_LOG_FILE)
+        block_fh = logging.handlers.RotatingFileHandler(
+            BLOCK_LOG_FILE,
+            maxBytes=512 * 1024,  # 512KB
+            backupCount=1,
+        )
         block_fh.setFormatter(fmt)
         block_logger.addHandler(block_fh)
     except PermissionError:
diff --git a/deploy/macos/clawedr.sb b/deploy/macos/clawedr.sb
@@ -247,7 +247,6 @@
 ;;;   [DOM-020] 0x0.st
 ;;; --- Blocked IPs (monitoring-only on macOS via log_tailer.py) ---
 ;;;   [IP-001] 51.222.161.168
-;;;   [IP-002] 178.128.242.134
 ;;;   [IP-003] 94.130.12.30
 ;;;   [IP-004] 104.22.10.63
 ;;;   [IP-005] 144.76.217.73
diff --git a/tests/test_linux_e2e.sh b/tests/test_linux_e2e.sh
@@ -12,7 +12,7 @@ echo "[*] Killing Mac instances to free dashboard port 8477..."
 sudo launchctl unload /Library/LaunchDaemons/com.clawedr.monitor.plist 2>/dev/null || true
 sudo pkill -9 -f uvicorn || true
 sudo pkill -9 -f log_tailer.py || true
-sleep 2
+sleep 1
 
 echo "[*] Running Linux E2E inside orb ubuntu..."
 orb -m ubuntu sudo bash << 'ORBEOF'
@@ -24,9 +24,11 @@ killall -9 openclaw 2>/dev/null || true
 killall -9 uvicorn 2>/dev/null || true
 systemctl stop clawedr.service 2>/dev/null || true
 systemctl stop clawedr-dashboard.service 2>/dev/null || true
+systemctl stop clawedr-monitor 2>/dev/null || true
 
-echo '[Linux] Freeing /tmp (kheaders can fill it)...'
-rm -rf /tmp/kheaders-* 2>/dev/null || true
+echo '[Linux] Clearing /tmp (kheaders, clawedr pid, etc.)...'
+rm -rf /tmp/kheaders-* /tmp/clawedr-* /tmp/pip-* /tmp/npm-* 2>/dev/null || true
+sync 2>/dev/null || true
 
 echo '[Linux] Cleaning old policy and logs...'
 rm -f /usr/local/share/clawedr/compiled_policy.json
@@ -42,7 +44,7 @@ export CLAWEDR_BASE_URL="file:///Users/leo/clawedr/deploy"
 ./deploy/install.sh
 
 echo '[Linux] Waiting for Services...'
-sleep 5
+sleep 3
 if ! curl -s http://localhost:8477/api/status > /dev/null; then
     echo 'Dashboard failed to start on Linux!'
     exit 1
@@ -51,43 +53,47 @@ echo 'Dashboard is up.'
 
 echo '[Linux] Adding Custom Rule (block nmap)...'
 curl -s -X POST -H 'Content-Type: application/json' -d '{"type": "executable", "value": "nmap", "platform": "linux"}' http://localhost:8477/api/custom-rules
-sleep 6 # Wait for BPF hotreload
+sleep 4 # Wait for BPF hotreload
 
-echo '[Linux] Mocking openclaw-real with bad actions...'
-REAL_NPM_OC=$(for d in /home/*/.npm-global/bin /usr/local/lib/node_modules/.bin; do if [ -x "$d/openclaw" ]; then echo "$d/openclaw"; break; fi; done)
-if [ -z "$REAL_NPM_OC" ]; then REAL_NPM_OC="/usr/bin/openclaw"; fi
-cat << 'MOCKEOF' > "$REAL_NPM_OC"
+echo '[Linux] Creating mock script (do NOT overwrite real openclaw - dashboard uses it)...'
+MOCK_SCRIPT="/tmp/clawedr_e2e_mock.sh"
+cat << 'MOCKEOF' > "$MOCK_SCRIPT"
 #!/bin/bash
 echo "[Mock] Running as openclaw"
-sleep 2
-
+sleep 1
 echo "[Mock] Triggering Executable rule (nc)..."
 /usr/bin/nc -h 2>/dev/null || true
-
 echo "[Mock] Triggering Path rule (shadow)..."
 cat /etc/shadow 2>/dev/null || true
-
 echo "[Mock] Triggering Custom rule (nmap)..."
 /usr/bin/nmap 2>/dev/null || true
-
 echo "[Mock] Triggering Network IP rule..."
 python3 -c 'import socket; s = socket.socket(); s.settimeout(2);
 try:
     getattr(s, "conn" + "ect")(("144.76.217.73", 80))
 except:
     pass' 2>/dev/null || true
-
+echo "[Mock] Triggering Domain rule (pastebin.com via argv)..."
+curl -s --connect-timeout 1 https://pastebin.com/robots.txt 2>/dev/null || true
 echo "[Mock] Done."
 MOCKEOF
-chmod +x "$REAL_NPM_OC"
+chmod +x "$MOCK_SCRIPT"
 
-echo '[Linux] Running mocked openclaw...'
-# Run through the wrapper so eBPF catches /usr/local/bin/openclaw being execve'd
-/usr/local/bin/openclaw &
+echo '[Linux] Restarting monitor with CLAWEDR_TARGET_BINARY so it tracks our mock...'
+pkill -f "monitor.py" 2>/dev/null || true
+sleep 2
+rm -f /tmp/clawedr-monitor.pid
+CLAWEDR_TARGET_BINARY="$MOCK_SCRIPT" CLAWEDR_POLICY_PATH=/usr/local/share/clawedr/compiled_policy.json \
+  CLAWEDR_BPF_SOURCE=/usr/local/share/clawedr/bpf_hooks.c CLAWEDR_LOG_FILE=/var/log/clawedr_monitor.log \
+  nohup python3 /usr/local/share/clawedr/monitor.py >/dev/null 2>&1 &
+sleep 3
+
+echo '[Linux] Running mock script (tracked via CLAWEDR_TARGET_BINARY)...'
+"$MOCK_SCRIPT" &
 MOCK_PID=$!
 
 # Give alerts time to buffer and flush to dashboard
-sleep 15
+sleep 10
 ALERTS=$(curl -s http://localhost:8477/api/alerts)
 
 echo -e '\n--- Linux Alert Report ---'
@@ -117,9 +123,26 @@ else
     echo '[FAIL] Missing path block alert!'
 fi
 
+if echo "$ALERTS" | grep -q '"rule_id": "DOM-016"' && echo "$ALERTS" | grep -q 'pastebin'; then
+    echo '[PASS] Domain block alert generated (argv filter, DOM-016)!'
+elif echo "$ALERTS" | grep -q 'pastebin' && echo "$ALERTS" | grep -q 'DOM-'; then
+    echo '[PASS] Domain block alert generated (argv filter)!'
+else
+    echo '[FAIL] Missing domain block alert (must be DOM rule, not BIN - curl is allowed)!'
+fi
+
 echo '[*] Cleanup...'
 kill -9 $MOCK_PID 2>/dev/null || true
-systemctl stop clawedr.service 2>/dev/null || true
+rm -f "$MOCK_SCRIPT" 2>/dev/null || true
+# Restart monitor normally (without CLAWEDR_TARGET_BINARY)
+pkill -f "monitor.py" 2>/dev/null || true
+sleep 2
+rm -f /tmp/clawedr-monitor.pid
+CLAWEDR_POLICY_PATH=/usr/local/share/clawedr/compiled_policy.json \
+  CLAWEDR_BPF_SOURCE=/usr/local/share/clawedr/bpf_hooks.c CLAWEDR_LOG_FILE=/var/log/clawedr_monitor.log \
+  nohup python3 /usr/local/share/clawedr/monitor.py >/dev/null 2>&1 &
+# Truncate block log so dashboard stops showing test alerts
+: > /var/log/clawedr.log 2>/dev/null || true
 ORBEOF
 
 echo -e "${GREEN}Linux Test Complete.${NC}"