1+ """
2+ Real integration tests for repository cleanup using actual file operations.
3+ NO MOCKS - everything runs against real systems.
4+ """
5+
6+ import pytest
7+ import tempfile
8+ import shutil
9+ import hashlib
10+ import sys
11+ import os
12+ from pathlib import Path
13+
14+ # Add parent directory to path for imports
15+ sys .path .insert (0 , str (Path (__file__ ).parent .parent ))
16+
17+ from quantum_conversations import ParticleFilter , TokenSequenceVisualizer
18+ from scripts .cleanup_repository import RepositoryCleanup
19+
20+
21+ @pytest .fixture
22+ def real_temp_directory ():
23+ """Create actual temporary directory for testing."""
24+ temp_dir = tempfile .mkdtemp ()
25+ yield Path (temp_dir )
26+ # Real cleanup
27+ shutil .rmtree (temp_dir )
28+
29+
30+ class TestRealFileOperations :
31+ """Test real file operations without any mocks."""
32+
33+ def test_real_output_movement (self , real_temp_directory ):
34+ """Test moving real files with actual I/O operations."""
35+ # Create real test files
36+ test_file = real_temp_directory / 'test_output.png'
37+ test_file .write_bytes (b'\x89 PNG\r \n \x1a \n ' + b'TEST_DATA' ) # Real PNG header
38+
39+ # Real directory operations
40+ dest_dir = real_temp_directory / 'derivatives'
41+ dest_dir .mkdir ()
42+
43+ # Actual file movement
44+ shutil .move (str (test_file ), str (dest_dir / test_file .name ))
45+
46+ # Verify with real filesystem checks
47+ assert not test_file .exists ()
48+ assert (dest_dir / test_file .name ).exists ()
49+ assert (dest_dir / test_file .name ).read_bytes ().startswith (b'\x89 PNG' )
50+
51+ def test_duplicate_detection_real_files (self , real_temp_directory ):
52+ """Test duplicate detection with actual file comparison."""
53+ # Create real duplicate files
54+ file1 = real_temp_directory / 'file1.png'
55+ file2 = real_temp_directory / 'file2.png'
56+
57+ content = b'\x89 PNG\r \n \x1a \n ' + b'SAME_CONTENT'
58+ file1 .write_bytes (content )
59+ file2 .write_bytes (content )
60+
61+ # Real hash comparison
62+ hash1 = hashlib .sha256 (file1 .read_bytes ()).hexdigest ()
63+ hash2 = hashlib .sha256 (file2 .read_bytes ()).hexdigest ()
64+
65+ assert hash1 == hash2 # Verify duplicates detected
66+
67+ def test_cleanup_script_dry_run (self , real_temp_directory ):
68+ """Test cleanup script in dry-run mode with real files."""
69+ # Create test structure
70+ code_dir = real_temp_directory / 'code'
71+ code_dir .mkdir ()
72+
73+ # Create real test files
74+ test_output = code_dir / 'test_output.png'
75+ test_output .write_bytes (b'\x89 PNG\r \n \x1a \n ' + b'TEST' )
76+
77+ demo_script = code_dir / 'demo_test.py'
78+ demo_script .write_text ('print("test")' )
79+
80+ cache_dir = code_dir / '__pycache__'
81+ cache_dir .mkdir ()
82+ cache_file = cache_dir / 'test.pyc'
83+ cache_file .write_bytes (b'PYCODE' )
84+
85+ # Run cleanup in dry-run mode
86+ cleanup = RepositoryCleanup (base_path = code_dir , dry_run = True )
87+ stats = cleanup .run ()
88+
89+ # Verify nothing was actually moved/deleted
90+ assert test_output .exists ()
91+ assert demo_script .exists ()
92+ assert cache_dir .exists ()
93+
94+ # But stats should show what would be done
95+ assert stats ['files_moved' ] > 0 or stats ['files_deleted' ] > 0
96+
97+ def test_cleanup_script_real_execution (self , real_temp_directory ):
98+ """Test cleanup script with real execution."""
99+ # Create test structure
100+ code_dir = real_temp_directory / 'code'
101+ code_dir .mkdir ()
102+
103+ # Create data directory structure
104+ data_dir = real_temp_directory / 'data' / 'derivatives'
105+ data_dir .mkdir (parents = True )
106+
107+ # Create real test files
108+ test_output = code_dir / 'test_output.png'
109+ test_output .write_bytes (b'\x89 PNG\r \n \x1a \n ' + b'TEST' )
110+
111+ cache_dir = code_dir / '__pycache__'
112+ cache_dir .mkdir ()
113+ cache_file = cache_dir / 'test.pyc'
114+ cache_file .write_bytes (b'PYCODE' )
115+
116+ # Run cleanup for real
117+ cleanup = RepositoryCleanup (base_path = code_dir , dry_run = False )
118+ stats = cleanup .run ()
119+
120+ # Verify files were actually moved/deleted
121+ assert not test_output .exists () # Should be moved
122+ assert not cache_dir .exists () # Should be deleted
123+
124+ # Check files were moved to correct location
125+ moved_file = data_dir / 'test_outputs' / 'test_output.png'
126+ assert moved_file .exists ()
127+ assert moved_file .read_bytes ().startswith (b'\x89 PNG' )
128+
129+ # Verify stats
130+ assert stats ['files_moved' ] >= 1
131+ assert stats ['directories_deleted' ] >= 1
132+
133+
134+ class TestRealModelLoading :
135+ """Test model functionality preservation after cleanup."""
136+
137+ def test_cleanup_preserves_model_functionality (self ):
138+ """Ensure cleanup doesn't break real model loading."""
139+ # Real model initialization
140+ pf = ParticleFilter (
141+ model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" ,
142+ n_particles = 2 ,
143+ device = "cpu"
144+ )
145+
146+ # Real generation
147+ pf .initialize ("Test prompt" )
148+ particles = pf .generate (n_steps = 10 ) # Use n_steps instead of max_length
149+
150+ # Verify real output
151+ assert len (particles ) == 2
152+ assert all (len (p .tokens ) > 0 for p in particles )
153+
154+ def test_visualizer_after_cleanup (self ):
155+ """Test visualizer still works after cleanup."""
156+ # Real particle generation
157+ pf = ParticleFilter (
158+ model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" ,
159+ n_particles = 3 ,
160+ device = "cpu"
161+ )
162+ pf .initialize ("Hello" )
163+ particles = pf .generate (n_steps = 5 ) # Use n_steps
164+
165+ # Initialize visualizer with tokenizer
166+ visualizer = TokenSequenceVisualizer (pf .tokenizer )
167+
168+ # Real visualization (to temp file)
169+ with tempfile .NamedTemporaryFile (suffix = '.png' , delete = False ) as tmp :
170+ output_path = Path (tmp .name )
171+
172+ try :
173+ visualizer .visualize_bumplot (particles , output_path )
174+
175+ # Verify real output
176+ assert output_path .exists ()
177+ assert output_path .stat ().st_size > 0
178+
179+ # Verify it's a real PNG
180+ header = output_path .read_bytes ()[:8 ]
181+ assert header == b'\x89 PNG\r \n \x1a \n '
182+ finally :
183+ # Clean up
184+ if output_path .exists ():
185+ output_path .unlink ()
186+
187+
188+ class TestRealGitignoreUpdates :
189+ """Test .gitignore updates with real file operations."""
190+
191+ def test_gitignore_patterns_work (self , real_temp_directory ):
192+ """Test that gitignore patterns actually work."""
193+ # Create test repository
194+ repo_dir = real_temp_directory / 'repo'
195+ repo_dir .mkdir ()
196+
197+ gitignore = repo_dir / '.gitignore'
198+ gitignore .write_text ("""
199+ # Test patterns
200+ *.pyc
201+ __pycache__/
202+ *.png
203+ test_*/
204+ """ )
205+
206+ # Create files that should be ignored
207+ ignored_files = [
208+ repo_dir / 'test.pyc' ,
209+ repo_dir / 'output.png' ,
210+ repo_dir / '__pycache__' / 'cache.pyc' ,
211+ repo_dir / 'test_dir' / 'file.txt' ,
212+ ]
213+
214+ for file in ignored_files :
215+ file .parent .mkdir (parents = True , exist_ok = True )
216+ file .write_text ('ignored' )
217+
218+ # Use git to check if files would be ignored (if git is available)
219+ try :
220+ import subprocess
221+ os .chdir (repo_dir )
222+ subprocess .run (['git' , 'init' ], capture_output = True )
223+
224+ # Check which files git would ignore
225+ result = subprocess .run (
226+ ['git' , 'check-ignore' , * [str (f .relative_to (repo_dir )) for f in ignored_files ]],
227+ capture_output = True
228+ )
229+
230+ # All files should be ignored (exit code 0)
231+ assert result .returncode == 0
232+ except (subprocess .SubprocessError , FileNotFoundError ):
233+ # Git not available, skip git-specific test
234+ pass
235+
236+
237+ class TestRealFileHashing :
238+ """Test file hashing and deduplication with real files."""
239+
240+ def test_hash_large_file (self , real_temp_directory ):
241+ """Test hashing of large files."""
242+ # Create a large file (1MB)
243+ large_file = real_temp_directory / 'large.bin'
244+ content = os .urandom (1024 * 1024 ) # 1MB of random data
245+ large_file .write_bytes (content )
246+
247+ # Calculate hash
248+ hasher = hashlib .sha256 ()
249+ with open (large_file , 'rb' ) as f :
250+ while chunk := f .read (8192 ):
251+ hasher .update (chunk )
252+
253+ hash_result = hasher .hexdigest ()
254+
255+ # Verify hash is correct
256+ assert len (hash_result ) == 64 # SHA256 is 64 hex chars
257+
258+ # Create identical copy and verify same hash
259+ copy_file = real_temp_directory / 'copy.bin'
260+ copy_file .write_bytes (content )
261+
262+ hasher2 = hashlib .sha256 ()
263+ with open (copy_file , 'rb' ) as f :
264+ while chunk := f .read (8192 ):
265+ hasher2 .update (chunk )
266+
267+ assert hasher2 .hexdigest () == hash_result
268+
269+ def test_detect_different_files (self , real_temp_directory ):
270+ """Test that different files have different hashes."""
271+ file1 = real_temp_directory / 'file1.txt'
272+ file2 = real_temp_directory / 'file2.txt'
273+
274+ file1 .write_text ('Content 1' )
275+ file2 .write_text ('Content 2' )
276+
277+ hash1 = hashlib .sha256 (file1 .read_bytes ()).hexdigest ()
278+ hash2 = hashlib .sha256 (file2 .read_bytes ()).hexdigest ()
279+
280+ assert hash1 != hash2
281+
282+
283+ class TestRealDirectoryOperations :
284+ """Test directory operations with real filesystem."""
285+
286+ def test_recursive_directory_size (self , real_temp_directory ):
287+ """Test calculating directory size recursively."""
288+ # Create nested structure
289+ root = real_temp_directory / 'root'
290+ root .mkdir ()
291+
292+ # Create files at different levels
293+ (root / 'file1.txt' ).write_text ('x' * 100 )
294+
295+ subdir1 = root / 'subdir1'
296+ subdir1 .mkdir ()
297+ (subdir1 / 'file2.txt' ).write_text ('x' * 200 )
298+
299+ subdir2 = subdir1 / 'subdir2'
300+ subdir2 .mkdir ()
301+ (subdir2 / 'file3.txt' ).write_text ('x' * 300 )
302+
303+ # Calculate total size
304+ total_size = 0
305+ for item in root .rglob ('*' ):
306+ if item .is_file ():
307+ total_size += item .stat ().st_size
308+
309+ assert total_size == 600 # 100 + 200 + 300
310+
311+ def test_directory_merge (self , real_temp_directory ):
312+ """Test merging directories with real files."""
313+ # Create source and destination directories
314+ src = real_temp_directory / 'src'
315+ src .mkdir ()
316+ (src / 'file1.txt' ).write_text ('Source 1' )
317+
318+ dest = real_temp_directory / 'dest'
319+ dest .mkdir ()
320+ (dest / 'file2.txt' ).write_text ('Dest 2' )
321+
322+ # Merge directories
323+ for item in src .iterdir ():
324+ shutil .move (str (item ), str (dest / item .name ))
325+
326+ # Verify merge
327+ assert not (src / 'file1.txt' ).exists ()
328+ assert (dest / 'file1.txt' ).exists ()
329+ assert (dest / 'file2.txt' ).exists ()
330+ assert (dest / 'file1.txt' ).read_text () == 'Source 1'
0 commit comments