Skip to content

Commit 0c99fd6

Browse files
feat: log project hash on changes
1 parent 2852741 commit 0c99fd6

File tree

2 files changed

+231
-2
lines changed

2 files changed

+231
-2
lines changed

src/writer/app_runner.py

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
import concurrent.futures
3+
import hashlib
34
import importlib.util
45
import io
56
import logging
@@ -8,6 +9,7 @@
89
import multiprocessing.connection
910
import multiprocessing.synchronize
1011
import os
12+
import pathlib
1113
import shutil
1214
import signal
1315
import subprocess
@@ -16,7 +18,7 @@
1618
import threading
1719
import zipfile
1820
from types import ModuleType
19-
from typing import Any, Callable, Dict, List, Optional, Union, cast
21+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
2022

2123
import watchdog.events
2224
from pydantic import ValidationError
@@ -691,6 +693,73 @@ def on_any_event(self, event) -> None:
691693
return
692694
self.update_callback()
693695

696+
class ProjectHashLogHandler(watchdog.events.PatternMatchingEventHandler):
697+
def __init__(self, app_path: str, wf_project_context: WfProjectContext, patterns: List[str]):
698+
super().__init__(patterns=patterns)
699+
self.wf_project_context = wf_project_context
700+
701+
for file in pathlib.Path(app_path).rglob("*"):
702+
if file.is_dir():
703+
continue
704+
file_hash = hashlib.md5()
705+
with open(file, 'rb') as f:
706+
while chunk := f.read(8192):
707+
file_hash.update(chunk)
708+
self.wf_project_context.file_hashes[str(file.absolute())] = file_hash.hexdigest()
709+
710+
self.project_hash = ""
711+
self._log_hash()
712+
713+
def _calculate_project_hash(self) -> str:
714+
project_hash = hashlib.md5()
715+
for filename in sorted(self.wf_project_context.file_hashes.keys()):
716+
file_hash = self.wf_project_context.file_hashes[filename]
717+
project_hash.update(bytes.fromhex(file_hash))
718+
return project_hash.hexdigest()
719+
720+
def _process_file(self, file_path) -> None:
721+
try:
722+
file_hash = hashlib.md5()
723+
with open(file_path, 'rb') as f:
724+
while chunk := f.read(8192):
725+
file_hash.update(chunk)
726+
self.wf_project_context.file_hashes[file_path] = file_hash.hexdigest()
727+
except FileNotFoundError:
728+
return
729+
except Exception as e:
730+
logging.warning(f"Failed to hash {file_path}: {e}")
731+
self._log_hash()
732+
733+
def _log_hash(self) -> None:
734+
previous_project_hash = self.project_hash
735+
self.project_hash = self._calculate_project_hash()
736+
if previous_project_hash != self.project_hash:
737+
logging.debug(f"Project hash: {self.project_hash}")
738+
739+
def on_modified(self, event: Union[watchdog.events.DirModifiedEvent, watchdog.events.FileModifiedEvent]):
740+
if not event.is_directory:
741+
self._process_file(event.src_path)
742+
743+
def on_created(self, event: Union[watchdog.events.DirCreatedEvent, watchdog.events.FileCreatedEvent]):
744+
if not event.is_directory:
745+
self._process_file(event.src_path)
746+
else:
747+
for sub_event in watchdog.events.generate_sub_created_events(event.src_path):
748+
self.on_created(sub_event)
749+
750+
def on_moved(self, event: Union[watchdog.events.DirMovedEvent, watchdog.events.FileMovedEvent]):
751+
if not event.is_directory:
752+
self.wf_project_context.file_hashes.pop(event.src_path, None)
753+
self._process_file(event.dest_path)
754+
else:
755+
for sub_event in watchdog.events.generate_sub_moved_events(event.src_path):
756+
self.on_moved(sub_event)
757+
758+
def on_deleted(self, event: Union[watchdog.events.DirDeletedEvent, watchdog.events.FileDeletedEvent]):
759+
if not event.is_directory:
760+
self.wf_project_context.file_hashes.pop(event.src_path, None)
761+
self._log_hash()
762+
694763

695764
class ThreadSafeAsyncEvent(asyncio.Event):
696765
"""Asyncio event adapted to be thread-safe."""
@@ -824,6 +893,12 @@ def _start_fs_observer(self):
824893
path=self.app_path,
825894
recursive=True,
826895
)
896+
if logging.getLogger().isEnabledFor(logging.DEBUG):
897+
self.observer.schedule(
898+
ProjectHashLogHandler(self.app_path, self.wf_project_context, patterns=["*"]),
899+
path=self.app_path,
900+
recursive=True,
901+
)
827902
# See _install_requirements docstring for info
828903
# self.observer.schedule(
829904
# FileEventHandler(self._install_requirements, patterns=["requirements.txt"]),

tests/backend/test_app_runner.py

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import asyncio
2+
import hashlib
23
import threading
4+
from types import SimpleNamespace
35

46
import pytest
5-
from writer.app_runner import AppRunner
7+
import watchdog.events
8+
from writer.app_runner import AppRunner, ProjectHashLogHandler
69
from writer.ss_types import (
710
EventRequest,
811
InitSessionRequest,
@@ -345,3 +348,154 @@ async def test_handle_event_should_return_result_of_event_handler_execution(
345348

346349
# Then
347350
assert res.payload.result["result"] is not None
351+
352+
353+
354+
@pytest.fixture
355+
def wf_project_context():
356+
return SimpleNamespace(file_hashes={})
357+
358+
359+
@pytest.fixture
360+
def sample_app(tmp_path):
361+
"""
362+
Creates a directory structure:
363+
app/
364+
a.txt
365+
sub/
366+
b.txt
367+
"""
368+
app = tmp_path / "app"
369+
app.mkdir()
370+
371+
(app / "a.txt").write_text("hello")
372+
sub = app / "sub"
373+
sub.mkdir()
374+
(sub / "b.txt").write_text("world")
375+
376+
return app
377+
378+
379+
class TestProjectHashLogHandler:
380+
@staticmethod
381+
def md5_of_bytes(data: bytes) -> str:
382+
h = hashlib.md5()
383+
h.update(data)
384+
return h.hexdigest()
385+
386+
def test_initial_hashing(self, sample_app, wf_project_context):
387+
handler = ProjectHashLogHandler(
388+
app_path=str(sample_app),
389+
wf_project_context=wf_project_context,
390+
patterns=["*"],
391+
)
392+
393+
expected = {
394+
str((sample_app / "a.txt").absolute()): self.md5_of_bytes(b"hello"),
395+
str((sample_app / "sub" / "b.txt").absolute()): self.md5_of_bytes(b"world"),
396+
}
397+
398+
assert wf_project_context.file_hashes == expected
399+
assert handler.project_hash == handler._calculate_project_hash()
400+
401+
402+
def test_project_hash_is_order_independent(self, sample_app, wf_project_context):
403+
handler = ProjectHashLogHandler(
404+
app_path=str(sample_app),
405+
wf_project_context=wf_project_context,
406+
patterns=["*"],
407+
)
408+
409+
original_hash = handler.project_hash
410+
411+
# Reinsert hashes in reverse order
412+
items = list(wf_project_context.file_hashes.items())
413+
wf_project_context.file_hashes.clear()
414+
for k, v in reversed(items):
415+
wf_project_context.file_hashes[k] = v
416+
417+
assert handler._calculate_project_hash() == original_hash
418+
419+
420+
def test_on_modified_updates_file_hash(self, sample_app, wf_project_context):
421+
handler = ProjectHashLogHandler(
422+
app_path=str(sample_app),
423+
wf_project_context=wf_project_context,
424+
patterns=["*"],
425+
)
426+
427+
file_path = sample_app / "a.txt"
428+
file_path.write_text("changed")
429+
430+
event = watchdog.events.FileModifiedEvent(str(file_path))
431+
handler.on_modified(event)
432+
433+
assert wf_project_context.file_hashes[str(file_path)] == self.md5_of_bytes(b"changed")
434+
435+
436+
def test_on_created_adds_file(self, sample_app, wf_project_context):
437+
handler = ProjectHashLogHandler(
438+
app_path=str(sample_app),
439+
wf_project_context=wf_project_context,
440+
patterns=["*"],
441+
)
442+
443+
new_file = sample_app / "new.txt"
444+
new_file.write_text("new")
445+
446+
event = watchdog.events.FileCreatedEvent(str(new_file))
447+
handler.on_created(event)
448+
449+
assert str(new_file) in wf_project_context.file_hashes
450+
assert wf_project_context.file_hashes[str(new_file)] == self.md5_of_bytes(b"new")
451+
452+
453+
def test_on_deleted_removes_file(self, sample_app, wf_project_context):
454+
handler = ProjectHashLogHandler(
455+
app_path=str(sample_app),
456+
wf_project_context=wf_project_context,
457+
patterns=["*"],
458+
)
459+
460+
file_path = sample_app / "a.txt"
461+
event = watchdog.events.FileDeletedEvent(str(file_path))
462+
463+
handler.on_deleted(event)
464+
465+
assert str(file_path) not in wf_project_context.file_hashes
466+
467+
468+
def test_on_moved_updates_hashes(self, sample_app, wf_project_context):
469+
handler = ProjectHashLogHandler(
470+
app_path=str(sample_app),
471+
wf_project_context=wf_project_context,
472+
patterns=["*"],
473+
)
474+
475+
src = sample_app / "a.txt"
476+
dest = sample_app / "a_renamed.txt"
477+
src.rename(dest)
478+
479+
event = watchdog.events.FileMovedEvent(
480+
src_path=str(src),
481+
dest_path=str(dest),
482+
)
483+
484+
handler.on_moved(event)
485+
486+
assert str(src) not in wf_project_context.file_hashes
487+
assert str(dest) in wf_project_context.file_hashes
488+
assert wf_project_context.file_hashes[str(dest)] == self.md5_of_bytes(b"hello")
489+
490+
491+
def test_process_file_missing_is_ignored(self, sample_app, wf_project_context):
492+
handler = ProjectHashLogHandler(
493+
app_path=str(sample_app),
494+
wf_project_context=wf_project_context,
495+
patterns=["*"],
496+
)
497+
498+
missing_file = sample_app / "missing.txt"
499+
500+
handler._process_file(str(missing_file))
501+
assert str(missing_file) not in wf_project_context.file_hashes

0 commit comments

Comments
 (0)