11"""Service for syncing files between filesystem and database."""
22
3+ import asyncio
34import os
45import time
6+ from concurrent .futures import ThreadPoolExecutor
57from dataclasses import dataclass , field
68from datetime import datetime
79from pathlib import Path
@@ -80,6 +82,41 @@ def __init__(
8082 self .relation_repository = relation_repository
8183 self .search_service = search_service
8284 self .file_service = file_service
85+ self ._thread_pool = ThreadPoolExecutor (max_workers = app_config .sync_thread_pool_size )
86+
87+ async def _read_file_async (self , file_path : Path ) -> str :
88+ """Read file content in thread pool to avoid blocking the event loop."""
89+ loop = asyncio .get_event_loop ()
90+ return await loop .run_in_executor (self ._thread_pool , file_path .read_text , "utf-8" )
91+
92+ async def _compute_checksum_async (self , path : str ) -> str :
93+ """Compute file checksum in thread pool to avoid blocking the event loop."""
94+
95+ def _sync_compute_checksum (path_str : str ) -> str :
96+ # Synchronous version for thread pool execution
97+ path_obj = self .file_service .base_path / path_str
98+
99+ if self .file_service .is_markdown (path_str ):
100+ content = path_obj .read_text (encoding = "utf-8" )
101+ else :
102+ content = path_obj .read_bytes ()
103+
104+ # Use the synchronous version of compute_checksum
105+ import hashlib
106+
107+ if isinstance (content , str ):
108+ content_bytes = content .encode ("utf-8" )
109+ else :
110+ content_bytes = content
111+ return hashlib .sha256 (content_bytes ).hexdigest ()
112+
113+ loop = asyncio .get_event_loop ()
114+ return await loop .run_in_executor (self ._thread_pool , _sync_compute_checksum , path )
115+
116+ def __del__ (self ):
117+ """Cleanup thread pool when service is destroyed."""
118+ if hasattr (self , "_thread_pool" ):
119+ self ._thread_pool .shutdown (wait = False )
83120
84121 async def sync (self , directory : Path , project_name : Optional [str ] = None ) -> SyncReport :
85122 """Sync all files with database."""
@@ -289,7 +326,7 @@ async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optiona
289326 logger .debug (f"Parsing markdown file, path: { path } , new: { new } " )
290327
291328 file_path = self .entity_parser .base_path / path
292- file_content = file_path . read_text ( encoding = "utf-8" )
329+ file_content = await self . _read_file_async ( file_path )
293330 file_contains_frontmatter = has_frontmatter (file_content )
294331
295332 # entity markdown will always contain front matter, so it can be used up create/update the entity
@@ -326,7 +363,7 @@ async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optiona
326363 # After updating relations, we need to compute the checksum again
327364 # This is necessary for files with wikilinks to ensure consistent checksums
328365 # after relation processing is complete
329- final_checksum = await self .file_service . compute_checksum (path )
366+ final_checksum = await self ._compute_checksum_async (path )
330367
331368 # set checksum
332369 await self .entity_repository .update (entity .id , {"checksum" : final_checksum })
@@ -350,7 +387,7 @@ async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Optional
350387 Returns:
351388 Tuple of (entity, checksum)
352389 """
353- checksum = await self .file_service . compute_checksum (path )
390+ checksum = await self ._compute_checksum_async (path )
354391 if new :
355392 # Generate permalink from path
356393 await self .entity_service .resolve_permalink (path )
@@ -620,7 +657,7 @@ async def scan_directory(self, directory: Path) -> ScanResult:
620657
621658 path = Path (root ) / filename
622659 rel_path = path .relative_to (directory ).as_posix ()
623- checksum = await self .file_service . compute_checksum (rel_path )
660+ checksum = await self ._compute_checksum_async (rel_path )
624661 result .files [rel_path ] = checksum
625662 result .checksums [checksum ] = rel_path
626663
0 commit comments