Skip to content

Commit 417ce04

Browse files
committed
Add python script to parse debug log output to find discrepancies in size tracker data
1 parent 6103dde commit 417ce04

File tree

1 file changed

+260
-0
lines changed

1 file changed

+260
-0
lines changed

tools/auditSizeTrackerLog.py

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Analyze cloudfuse size tracker logs to track file sizes and detect discrepancies.
4+
5+
This script processes log files to:
6+
1. Track the size of each file in the filesystem
7+
2. Detect when files are created, modified, renamed, or deleted
8+
3. Identify discrepancies in size calculations
9+
4. Monitor sync operations and epoch changes
10+
"""
11+
12+
import re
13+
import sys
14+
from typing import Dict, Optional
15+
from dataclasses import dataclass
16+
from datetime import datetime
17+
18+
19+
@dataclass
20+
class FileInfo:
21+
"""Information about a file in the filesystem."""
22+
size: Optional[int] # None means unknown size
23+
last_updated: str # Timestamp of last update
24+
25+
def __repr__(self):
26+
size_str = str(self.size) if self.size is not None else "UNKNOWN"
27+
return f"FileInfo(size={size_str}, last_updated={self.last_updated})"
28+
29+
30+
class SizeTrackerAnalyzer:
31+
"""Analyzer for size tracker logs."""
32+
33+
def __init__(self):
34+
self.files: Dict[str, FileInfo] = {}
35+
self.total_delta = 0
36+
self.last_sync_total = None
37+
self.discrepancies = []
38+
self.line_number = 0
39+
self.timestamp = ""
40+
self.epochChanged = False
41+
self.firstSync = True
42+
43+
def parse_timestamp(self, line: str) -> Optional[str]:
44+
"""Extract timestamp from log line."""
45+
match = re.match(r'^(\w+ \w+ \d+ \d+:\d+:\d+\.\d+ \w+ \d+)', line)
46+
return match.group(1) if match else None
47+
48+
def handle_add(self, line: str):
49+
"""Handle SizeTracker::Add log entries."""
50+
match = re.search(r'SizeTracker::Add : (-?\d+)', line)
51+
if match:
52+
delta = int(match.group(1))
53+
self.total_delta += delta
54+
print(f"[{self.line_number}] [{self.timestamp}] Add delta: {delta:+d}, cumulative delta: {self.total_delta:+d}")
55+
56+
def handle_copy_from_file(self, line: str):
57+
"""Handle SizeTracker::CopyFromFile log entries."""
58+
debug_match = re.search(r'SizeTracker::CopyFromFile : (.+?) Add\((.+?)\)', line)
59+
60+
if debug_match:
61+
filepath = debug_match.group(1).strip()
62+
size_info = debug_match.group(2)
63+
64+
# Parse size_info which
65+
# e.g. "84535181-4096" (new size - old size)
66+
if size_info.find("-") < 1:
67+
delta = int(size_info)
68+
# Update file info
69+
if filepath in self.files:
70+
self.files[filepath].size += delta
71+
self.files[filepath].last_updated = self.timestamp
72+
print(f"[{self.line_number}] [{self.timestamp}] CopyFromFile (OLD STYLE): '{filepath}' -> delta={delta:+d}")
73+
return
74+
parts = size_info.split('-')
75+
new_size = int(parts[0])
76+
old_size = int(parts[1])
77+
delta = new_size - old_size
78+
79+
# Check if file exists and if the old size matches
80+
if filepath in self.files:
81+
tracked_size = self.files[filepath].size
82+
if tracked_size is not None and tracked_size != old_size:
83+
discrepancy = f"[{self.line_number}] [{self.timestamp}] CopyFromFile size mismatch for '{filepath}': expected old_size={tracked_size}, got old_size={old_size}"
84+
print(f" ⚠️ {discrepancy}")
85+
self.discrepancies.append(discrepancy)
86+
87+
# Update file info
88+
self.files[filepath] = FileInfo(size=new_size, last_updated=self.timestamp)
89+
print(f"[{self.line_number}] [{self.timestamp}] CopyFromFile: '{filepath}' -> size={new_size} (delta={delta:+d})")
90+
91+
def handle_delete_file(self, line: str):
92+
"""Handle SizeTracker::DeleteFile log entries."""
93+
debug_match = re.search(r'SizeTracker::DeleteFile : (.+?) Add\((-\d+)\)', line)
94+
95+
if debug_match:
96+
filepath = debug_match.group(1).strip()
97+
delta = int(debug_match.group(2))
98+
99+
# Check if file exists
100+
if filepath in self.files:
101+
tracked_size = self.files[filepath].size
102+
if tracked_size is not None and delta != -tracked_size:
103+
discrepancy = f"[{self.line_number}] [{self.timestamp}] DeleteFile size mismatch for '{filepath}': expected delta={-tracked_size}, got delta={delta}"
104+
print(f" ⚠️ {discrepancy}")
105+
self.discrepancies.append(discrepancy)
106+
107+
# Remove from tracking
108+
del self.files[filepath]
109+
print(f"[{self.line_number}] [{self.timestamp}] DeleteFile: '{filepath}' (delta={delta:+d})")
110+
else:
111+
print(f"[{self.line_number}] [{self.timestamp}] DeleteFile: '{filepath}' (delta={delta:+d}) [file not tracked]")
112+
113+
def handle_rename_file(self, line: str):
114+
"""Handle SizeTracker::RenameFile log entries."""
115+
match = re.search(r'SizeTracker::RenameFile : (.+?)->(.+)$', line)
116+
if match:
117+
src = match.group(1).strip()
118+
dst = match.group(2).strip()
119+
120+
# Transfer file info from src to dst
121+
if src in self.files:
122+
self.files[dst] = self.files[src]
123+
del self.files[src]
124+
print(f"[{self.line_number}] [{self.timestamp}] RenameFile: '{src}' -> '{dst}'")
125+
else:
126+
# File not tracked, but still record the rename
127+
self.files[dst] = FileInfo(size=None, last_updated=self.timestamp)
128+
print(f"[{self.line_number}] [{self.timestamp}] RenameFile: '{src}' -> '{dst}' [src not tracked]")
129+
130+
def handle_sync(self, line: str):
131+
"""Handle SizeTracker::sync log entries."""
132+
# Check for epoch change
133+
epoch_match = re.search(r'epoch changed \(local=(\d+) -> file=(\d+)\) — discarding delta (-?\d+)', line)
134+
if epoch_match:
135+
local_epoch = int(epoch_match.group(1))
136+
file_epoch = int(epoch_match.group(2))
137+
discarded_delta = int(epoch_match.group(3))
138+
print(f"\n{'='*80}")
139+
print(f"[{self.line_number}] [{self.timestamp}] 🔄 EPOCH CHANGE: local={local_epoch} -> file={file_epoch}, discarding delta={discarded_delta:+d}")
140+
if self.total_delta != discarded_delta:
141+
discrepancy = f"[{self.line_number}] [{self.timestamp}] Delta mismatch: expected delta={self.total_delta}. Discarded delta is off by {discarded_delta-self.total_delta}!"
142+
print(f" ⚠️ {discrepancy}")
143+
self.discrepancies.append(discrepancy)
144+
print(f"{'='*80}\n")
145+
self.total_delta = 0 # Reset delta after epoch change
146+
self.epochChanged = True
147+
self.firstSync = False
148+
return
149+
150+
# Normal sync pattern: "old_total + delta = new_total"
151+
sync_match = re.search(r'SizeTracker::sync : (\d+) \+ (-?\d+) = (\d+)', line)
152+
if sync_match:
153+
old_total = int(sync_match.group(1))
154+
delta = int(sync_match.group(2))
155+
new_total = int(sync_match.group(3))
156+
157+
print(f"\n{'='*80}")
158+
print(f"[{self.line_number}] [{self.timestamp}] 📊 SYNC: {old_total} + {delta:+d} = {new_total}")
159+
160+
# Verify the old total is correct
161+
if self.last_sync_total is not None and old_total != self.last_sync_total:
162+
epochDisclaimer = ""
163+
if self.epochChanged:
164+
epochDisclaimer = "(updated by audit)"
165+
self.epochChanged = False
166+
discrepancy = f"[{self.line_number}] [{self.timestamp}] Sync size mismatch: {old_total} != {self.last_sync_total} (off by {old_total - self.last_sync_total})! {epochDisclaimer}"
167+
print(f" ⚠️ {discrepancy}")
168+
self.discrepancies.append(discrepancy)
169+
170+
# Compare with tracked delta
171+
if not self.firstSync and self.total_delta != delta:
172+
discrepancy = f"[{self.line_number}] [{self.timestamp}] Sync delta mismatch: tracked={self.total_delta:+d}, sync={delta:+d}, difference={self.total_delta - delta:+d}"
173+
print(f" ⚠️ {discrepancy}")
174+
self.discrepancies.append(discrepancy)
175+
176+
print(f"{'='*80}\n")
177+
178+
# Reset for next sync period
179+
self.last_sync_total = new_total
180+
self.total_delta = 0
181+
self.firstSync = False
182+
183+
def process_line(self, line: str):
184+
"""Process a single log line."""
185+
self.line_number += 1
186+
187+
self.timestamp = self.parse_timestamp(line)
188+
if not self.timestamp:
189+
return
190+
191+
# Route to appropriate handler based on log message
192+
if 'SizeTracker::Add :' in line and 'journal.go' in line:
193+
self.handle_add(line)
194+
elif 'SizeTracker::CopyFromFile' in line:
195+
self.handle_copy_from_file(line)
196+
elif 'SizeTracker::DeleteFile' in line:
197+
self.handle_delete_file(line)
198+
elif 'SizeTracker::RenameFile' in line:
199+
self.handle_rename_file(line)
200+
elif 'SizeTracker::sync' in line:
201+
self.handle_sync(line)
202+
203+
def print_summary(self):
204+
"""Print summary of analysis."""
205+
print("\n" + "="*80)
206+
print("ANALYSIS SUMMARY")
207+
print("="*80)
208+
print(f"Total lines processed: {self.line_number}")
209+
print(f"Total files tracked: {len(self.files)}")
210+
print(f"Pending delta (since last sync): {self.total_delta:+d}")
211+
212+
if self.discrepancies:
213+
print(f"\n⚠️ Found {len(self.discrepancies)} discrepancies:")
214+
for i, discrepancy in enumerate(self.discrepancies, 1):
215+
print(f" {i}. {discrepancy}")
216+
else:
217+
print("\n✅ No discrepancies found!")
218+
219+
# Show some file statistics
220+
known_size_files = [f for f in self.files.values() if f.size is not None]
221+
unknown_size_files = [f for f in self.files.values() if f.size is None]
222+
223+
print(f"\nFiles with known size: {len(known_size_files)}")
224+
print(f"Files with unknown size: {len(unknown_size_files)}")
225+
226+
if known_size_files:
227+
total_size = sum(f.size for f in known_size_files)
228+
print(f"Total size of tracked files: {total_size:,} bytes ({total_size / (1024**3):.2f} GB)")
229+
230+
231+
def main():
232+
"""Main entry point."""
233+
if len(sys.argv) < 2:
234+
print(f"Usage: {sys.argv[0]} <log_file>")
235+
print(f"Example: {sys.argv[0]} combined.log")
236+
sys.exit(1)
237+
238+
log_file = sys.argv[1]
239+
240+
print(f"Analyzing log file: {log_file}")
241+
print("="*80 + "\n")
242+
243+
analyzer = SizeTrackerAnalyzer()
244+
245+
try:
246+
with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
247+
for line in f:
248+
analyzer.process_line(line.rstrip('\n'))
249+
except FileNotFoundError:
250+
print(f"Error: File '{log_file}' not found")
251+
sys.exit(1)
252+
except Exception as e:
253+
print(f"Error processing log file: {e}")
254+
sys.exit(1)
255+
256+
analyzer.print_summary()
257+
258+
259+
if __name__ == "__main__":
260+
main()

0 commit comments

Comments
 (0)