1+ #!/usr/bin/env python3
2+ """
3+ Huntarr Log Spam Monitor
4+ Monitors Docker logs for excessive spam messages and duplicate timestamps
5+ """
6+
7+ import subprocess
8+ import time
9+ import re
10+ from collections import defaultdict , Counter
11+ from datetime import datetime , timedelta
12+ import sys
13+
14+ class LogSpamMonitor :
15+ def __init__ (self ):
16+ self .message_counts = Counter ()
17+ self .timestamp_counts = Counter ()
18+ self .recent_messages = []
19+ self .spam_threshold = 5 # Messages repeated more than this are considered spam
20+ self .time_window = 60 # Monitor last 60 seconds
21+ self .duplicate_threshold = 2 # Same timestamp appearing more than this is suspicious
22+
23+ def extract_timestamp_and_message (self , log_line ):
24+ """Extract timestamp and clean message from log line"""
25+ # Pattern to match: "2025-06-13 05:08:14 UTC - huntarr - LEVEL - message"
26+ pattern = r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) UTC.*?- (.*)'
27+ match = re .search (pattern , log_line )
28+
29+ if match :
30+ timestamp_str = match .group (1 )
31+ message = match .group (2 ).strip ()
32+
33+ # Clean up the message by removing variable parts
34+ # Remove session IDs, IP addresses, etc.
35+ message = re .sub (r'session_id: [a-f0-9]+' , 'session_id: [REDACTED]' , message )
36+ message = re .sub (r'IP address: [\d\.]+' , 'IP address: [REDACTED]' , message )
37+ message = re .sub (r'path \'[^\']+\'' , 'path [REDACTED]' , message )
38+
39+ return timestamp_str , message
40+
41+ return None , None
42+
43+ def analyze_logs (self , lines ):
44+ """Analyze log lines for spam and duplicates"""
45+ current_time = datetime .now ()
46+ spam_detected = []
47+ duplicate_timestamps = []
48+
49+ # Clear old data
50+ self .message_counts .clear ()
51+ self .timestamp_counts .clear ()
52+
53+ for line in lines :
54+ timestamp_str , message = self .extract_timestamp_and_message (line )
55+
56+ if timestamp_str and message :
57+ # Count message occurrences
58+ self .message_counts [message ] += 1
59+
60+ # Count timestamp occurrences (down to the second)
61+ self .timestamp_counts [timestamp_str ] += 1
62+
63+ # Detect spam messages
64+ for message , count in self .message_counts .items ():
65+ if count > self .spam_threshold :
66+ spam_detected .append ({
67+ 'message' : message ,
68+ 'count' : count ,
69+ 'type' : 'repeated_message'
70+ })
71+
72+ # Detect duplicate timestamps
73+ for timestamp , count in self .timestamp_counts .items ():
74+ if count > self .duplicate_threshold :
75+ duplicate_timestamps .append ({
76+ 'timestamp' : timestamp ,
77+ 'count' : count ,
78+ 'type' : 'duplicate_timestamp'
79+ })
80+
81+ return spam_detected , duplicate_timestamps
82+
83+ def get_recent_logs (self , tail_lines = 100 ):
84+ """Get recent logs from Docker container"""
85+ try :
86+ result = subprocess .run (
87+ ['docker-compose' , 'logs' , 'huntarr' , '--tail' , str (tail_lines )],
88+ capture_output = True ,
89+ text = True ,
90+ timeout = 30
91+ )
92+
93+ if result .returncode == 0 :
94+ return result .stdout .strip ().split ('\n ' )
95+ else :
96+ print (f"Error getting logs: { result .stderr } " )
97+ return []
98+
99+ except subprocess .TimeoutExpired :
100+ print ("Timeout getting logs" )
101+ return []
102+ except Exception as e :
103+ print (f"Exception getting logs: { e } " )
104+ return []
105+
106+ def print_report (self , spam_detected , duplicate_timestamps ):
107+ """Print a formatted report of detected issues"""
108+ print (f"\n { '=' * 80 } " )
109+ print (f"LOG SPAM MONITOR REPORT - { datetime .now ().strftime ('%Y-%m-%d %H:%M:%S' )} " )
110+ print (f"{ '=' * 80 } " )
111+
112+ if spam_detected :
113+ print (f"\n 🚨 SPAM MESSAGES DETECTED ({ len (spam_detected )} types):" )
114+ print ("-" * 60 )
115+ for spam in spam_detected :
116+ print (f" Count: { spam ['count' ]:3d} | Message: { spam ['message' ][:100 ]} ..." )
117+
118+ if duplicate_timestamps :
119+ print (f"\n ⚠️ DUPLICATE TIMESTAMPS DETECTED ({ len (duplicate_timestamps )} timestamps):" )
120+ print ("-" * 60 )
121+ for dup in duplicate_timestamps :
122+ print (f" Count: { dup ['count' ]:3d} | Timestamp: { dup ['timestamp' ]} " )
123+
124+ if not spam_detected and not duplicate_timestamps :
125+ print ("\n ✅ NO SPAM OR DUPLICATE TIMESTAMPS DETECTED" )
126+ print (" Logs appear to be clean!" )
127+
128+ print (f"\n Thresholds: Spam > { self .spam_threshold } messages, Duplicates > { self .duplicate_threshold } timestamps" )
129+ print (f"{ '=' * 80 } \n " )
130+
131+ def monitor_continuously (self , interval = 30 ):
132+ """Monitor logs continuously"""
133+ print (f"🔍 Starting continuous log monitoring (checking every { interval } seconds)" )
134+ print (f" Spam threshold: { self .spam_threshold } repeated messages" )
135+ print (f" Duplicate threshold: { self .duplicate_threshold } same timestamps" )
136+ print (" Press Ctrl+C to stop\n " )
137+
138+ try :
139+ while True :
140+ lines = self .get_recent_logs (tail_lines = 200 )
141+ if lines :
142+ spam_detected , duplicate_timestamps = self .analyze_logs (lines )
143+
144+ # Only print report if issues are detected
145+ if spam_detected or duplicate_timestamps :
146+ self .print_report (spam_detected , duplicate_timestamps )
147+ else :
148+ # Just print a brief status
149+ print (f"✅ { datetime .now ().strftime ('%H:%M:%S' )} - Logs clean (checked { len (lines )} lines)" )
150+
151+ time .sleep (interval )
152+
153+ except KeyboardInterrupt :
154+ print ("\n \n 🛑 Monitoring stopped by user" )
155+ except Exception as e :
156+ print (f"\n ❌ Error during monitoring: { e } " )
157+
158+ def single_check (self ):
159+ """Perform a single check of the logs"""
160+ print ("🔍 Performing single log spam check..." )
161+ lines = self .get_recent_logs (tail_lines = 200 )
162+
163+ if lines :
164+ spam_detected , duplicate_timestamps = self .analyze_logs (lines )
165+ self .print_report (spam_detected , duplicate_timestamps )
166+
167+ # Return True if issues were found
168+ return len (spam_detected ) > 0 or len (duplicate_timestamps ) > 0
169+ else :
170+ print ("❌ Could not retrieve logs" )
171+ return False
172+
173+ def main ():
174+ monitor = LogSpamMonitor ()
175+
176+ if len (sys .argv ) > 1 and sys .argv [1 ] == '--continuous' :
177+ # Continuous monitoring mode
178+ interval = 30
179+ if len (sys .argv ) > 2 :
180+ try :
181+ interval = int (sys .argv [2 ])
182+ except ValueError :
183+ print ("Invalid interval, using default 30 seconds" )
184+
185+ monitor .monitor_continuously (interval )
186+ else :
187+ # Single check mode
188+ issues_found = monitor .single_check ()
189+ sys .exit (1 if issues_found else 0 )
190+
191+ if __name__ == "__main__" :
192+ main ()
0 commit comments