Skip to content

Commit 1068517

Browse files
hydropixclaude
andcommitted
Sync from main + working changes
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 0d84b55 commit 1068517

File tree

13 files changed

+437
-97
lines changed

13 files changed

+437
-97
lines changed

src/api/translation_state.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,28 @@
44
import threading
55
import time
66
import copy
7+
import uuid
78
from datetime import datetime
89
from typing import Dict, Any, Optional
910
from src.persistence.checkpoint_manager import CheckpointManager
1011

1112

13+
def generate_server_session_id() -> str:
14+
"""Generate a unique session ID for this server instance."""
15+
return str(uuid.uuid4())
16+
17+
1218
class TranslationStateManager:
1319
"""Thread-safe manager for translation state"""
1420

15-
def __init__(self, checkpoint_manager: Optional[CheckpointManager] = None):
21+
def __init__(self, checkpoint_manager: Optional[CheckpointManager] = None, server_session_id: Optional[str] = None):
1622
self._translations: Dict[str, Dict[str, Any]] = {}
1723
self._lock = threading.RLock() # Use RLock to allow nested locking
18-
self.checkpoint_manager = checkpoint_manager or CheckpointManager()
24+
# Generate a unique session ID for this server instance
25+
self.server_session_id = server_session_id or generate_server_session_id()
26+
self.checkpoint_manager = checkpoint_manager or CheckpointManager(
27+
server_session_id=self.server_session_id
28+
)
1929

2030
def create_translation(self, translation_id: str, config: Dict[str, Any]) -> None:
2131
"""Create a new translation entry"""
@@ -124,13 +134,20 @@ def get_translation_summaries(self) -> list:
124134
with self._lock:
125135
summaries = []
126136
for tid, data in self._translations.items():
137+
config = data.get('config', {})
138+
stats = data.get('stats', {})
127139
summaries.append({
128140
"translation_id": tid,
129141
"status": data.get('status'),
130142
"progress": data.get('progress'),
131-
"start_time": data.get('stats', {}).get('start_time'),
132-
"output_filename": data.get('config', {}).get('output_filename'),
133-
"file_type": data.get('config', {}).get('file_type', 'txt')
143+
"start_time": stats.get('start_time'),
144+
"output_filename": config.get('output_filename'),
145+
"input_filename": config.get('input_filename'),
146+
"file_type": config.get('file_type', 'txt'),
147+
# Include stats for UI restoration
148+
"total_chunks": stats.get('total_chunks', 0),
149+
"completed_chunks": stats.get('completed_chunks', 0),
150+
"last_translation": data.get('last_translation')
134151
})
135152
return sorted(summaries, key=lambda x: x.get('start_time', 0), reverse=True)
136153

src/api/websocket.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ def emit_update(socketio, translation_id, data_to_emit, state_manager):
3838
if 'progress' not in data_to_emit and 'progress' in translation_data:
3939
data_to_emit['progress'] = translation_data['progress']
4040

41+
# Store last translation for UI restoration after browser refresh
42+
log_entry = data_to_emit.get('log_entry')
43+
if (log_entry and log_entry.get('type') == 'llm_response' and
44+
log_entry.get('data', {}).get('response')):
45+
state_manager.set_translation_field(
46+
translation_id, 'last_translation', log_entry['data']['response']
47+
)
48+
4149
socketio.emit('translation_update', data_to_emit, namespace='/')
4250
except Exception as e:
4351
print(f"WebSocket emission error for {translation_id}: {e}")

src/core/adapters/generic_translator.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ async def translate(
5757
llm_provider: str,
5858
progress_callback: Optional[Callable] = None,
5959
log_callback: Optional[Callable] = None,
60+
stats_callback: Optional[Callable] = None,
6061
check_interruption_callback: Optional[Callable] = None,
6162
bilingual_output: bool = False,
6263
**llm_kwargs
@@ -71,6 +72,7 @@ async def translate(
7172
llm_provider: LLM provider name (ollama, gemini, openai, openrouter)
7273
progress_callback: Optional callback for progress updates (receives percentage)
7374
log_callback: Optional callback for logging (receives type and message)
75+
stats_callback: Optional callback for statistics updates (receives dict with total_chunks, completed_chunks, failed_chunks)
7476
check_interruption_callback: Optional callback to check if translation should be interrupted
7577
bilingual_output: If True, output will contain both original and translated text
7678
**llm_kwargs: Additional LLM configuration (endpoint, api_key, etc.)
@@ -100,6 +102,14 @@ async def translate(
100102
if log_callback:
101103
log_callback("units_found", f"Found {total_units} translation units")
102104

105+
# Send initial stats with total_chunks
106+
if stats_callback:
107+
stats_callback({
108+
'total_chunks': total_units,
109+
'completed_chunks': 0,
110+
'failed_chunks': 0
111+
})
112+
103113
# 3. Check for checkpoint and resume
104114
resume_from = 0
105115
checkpoint_data = self.checkpoint_manager.load_checkpoint(self.translation_id)
@@ -109,6 +119,13 @@ async def translate(
109119
if log_callback:
110120
log_callback("checkpoint_resumed",
111121
f"Resuming from unit {resume_from}/{total_units}")
122+
# Update stats with resumed progress
123+
if stats_callback:
124+
stats_callback({
125+
'total_chunks': total_units,
126+
'completed_chunks': resume_from,
127+
'failed_chunks': 0
128+
})
112129
else:
113130
# 4. Create new translation job
114131
self.checkpoint_manager.start_job(
@@ -216,6 +233,14 @@ async def translate(
216233
completed_chunks=i + 1
217234
)
218235

236+
# Update stats
237+
if stats_callback:
238+
stats_callback({
239+
'total_chunks': total_units,
240+
'completed_chunks': i + 1,
241+
'failed_chunks': failed_count
242+
})
243+
219244
# Update context for next unit
220245
last_context = (
221246
translated_content[-200:]
@@ -246,6 +271,14 @@ async def translate(
246271
failed_chunks=1
247272
)
248273

274+
# Update stats with failure
275+
if stats_callback:
276+
stats_callback({
277+
'total_chunks': total_units,
278+
'completed_chunks': i,
279+
'failed_chunks': failed_count
280+
})
281+
249282
except Exception as e:
250283
if log_callback:
251284
log_callback("unit_error",
@@ -263,6 +296,14 @@ async def translate(
263296
failed_chunks=1
264297
)
265298

299+
# Update stats with failure
300+
if stats_callback:
301+
stats_callback({
302+
'total_chunks': total_units,
303+
'completed_chunks': i,
304+
'failed_chunks': failed_count
305+
})
306+
266307
# 7. Reconstruct output file
267308
if log_callback:
268309
log_callback("reconstruct_start", "Reconstructing output file")

src/core/adapters/translate_file.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,6 @@ async def translate_file(
180180
'gemini_api_key': gemini_api_key,
181181
'openai_api_key': openai_api_key,
182182
'openrouter_api_key': openrouter_api_key,
183-
'progress_callback': progress_callback,
184-
'log_callback': log_callback,
185-
'stats_callback': stats_callback,
186-
'check_interruption_callback': check_interruption_callback,
187183
'prompt_options': prompt_options,
188184
}
189185

@@ -193,6 +189,10 @@ async def translate_file(
193189
target_language=target_language,
194190
model_name=model_name,
195191
llm_provider=llm_provider,
192+
progress_callback=progress_callback,
193+
log_callback=log_callback,
194+
stats_callback=stats_callback,
195+
check_interruption_callback=check_interruption_callback,
196196
bilingual_output=bilingual_output,
197197
**llm_config
198198
)

src/persistence/checkpoint_manager.py

Lines changed: 147 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,18 @@ class CheckpointManager:
1515
and file storage for uploaded files.
1616
"""
1717

18-
def __init__(self, db_path: str = "data/jobs.db"):
18+
def __init__(self, db_path: str = "data/jobs.db", server_session_id: Optional[str] = None):
1919
"""
2020
Initialize checkpoint manager.
2121
2222
Args:
2323
db_path: Path to SQLite database
24+
server_session_id: Unique identifier for the current server session
2425
"""
2526
self.db = Database(db_path)
2627
self.uploads_dir = Path("data/uploads")
2728
self.uploads_dir.mkdir(parents=True, exist_ok=True)
29+
self.server_session_id = server_session_id
2830

2931
def start_job(
3032
self,
@@ -49,8 +51,10 @@ def start_job(
4951
if input_file_path:
5052
self._preserve_input_file(translation_id, input_file_path, config)
5153

52-
# Create job in database with updated config
53-
success = self.db.create_job(translation_id, file_type, config)
54+
# Create job in database with updated config and server session ID
55+
success = self.db.create_job(
56+
translation_id, file_type, config, self.server_session_id
57+
)
5458

5559
return success
5660

@@ -263,6 +267,146 @@ def get_resumable_jobs(self) -> List[Dict[str, Any]]:
263267

264268
return jobs
265269

270+
def reset_running_jobs_on_startup(self) -> int:
271+
"""
272+
Reset jobs with 'running' status from previous server sessions to 'interrupted'.
273+
274+
Only resets jobs that have a different server_session_id, preserving
275+
jobs that are actually running in the current session. This prevents
276+
browser refreshes from interrupting active translations.
277+
278+
This should be called on server startup to handle jobs that were
279+
interrupted by a server crash or restart. These jobs will then
280+
appear in the resumable jobs list.
281+
282+
Returns:
283+
Number of jobs reset
284+
"""
285+
if not self.server_session_id:
286+
# Fallback: if no session ID, don't reset anything to be safe
287+
return 0
288+
return self.db.reset_running_jobs(self.server_session_id)
289+
290+
def cleanup_old_jobs(self, max_age_days: int = 30) -> Tuple[int, int]:
291+
"""
292+
Clean up old jobs and their associated files.
293+
294+
This removes jobs older than max_age_days and cleans up their
295+
upload directories to prevent database and disk bloat.
296+
297+
Args:
298+
max_age_days: Maximum age in days for jobs to keep (default 30)
299+
300+
Returns:
301+
Tuple of (jobs_deleted, files_cleaned)
302+
"""
303+
# Get list of old job IDs before deletion (for file cleanup)
304+
old_jobs = []
305+
try:
306+
from datetime import datetime, timedelta
307+
cutoff = datetime.now() - timedelta(days=max_age_days)
308+
309+
# Get jobs that will be deleted
310+
all_jobs = self.db.get_resumable_jobs(max_age_days=9999) # Get all
311+
for job in all_jobs:
312+
created_str = job.get('created_at', '')
313+
if created_str:
314+
try:
315+
created = datetime.fromisoformat(created_str.replace('Z', '+00:00'))
316+
if created.replace(tzinfo=None) < cutoff:
317+
old_jobs.append(job['translation_id'])
318+
except (ValueError, TypeError):
319+
pass
320+
except Exception as e:
321+
print(f"Warning: Error getting old job list: {e}")
322+
323+
# Delete from database
324+
jobs_deleted = self.db.cleanup_old_jobs(max_age_days)
325+
326+
# Clean up upload directories for deleted jobs
327+
files_cleaned = 0
328+
for job_id in old_jobs:
329+
job_upload_dir = self.uploads_dir / job_id
330+
if job_upload_dir.exists():
331+
try:
332+
shutil.rmtree(job_upload_dir)
333+
files_cleaned += 1
334+
except Exception as e:
335+
print(f"Warning: Could not delete upload directory for {job_id}: {e}")
336+
337+
return jobs_deleted, files_cleaned
338+
339+
def cleanup_orphan_uploads(self) -> int:
340+
"""
341+
Clean up upload files/directories that don't have corresponding jobs in the database.
342+
343+
These are "orphan" items left behind from previous incomplete cleanups.
344+
Handles:
345+
- trans_xxx folders (job ID folders)
346+
- hash_filename files (legacy upload files)
347+
348+
Returns:
349+
Number of orphan items deleted
350+
"""
351+
orphans_deleted = 0
352+
353+
if not self.uploads_dir.exists():
354+
return 0
355+
356+
# Get all job IDs and preserved file paths from database
357+
try:
358+
import sqlite3
359+
import json
360+
conn = sqlite3.connect(self.db.db_path)
361+
conn.row_factory = sqlite3.Row
362+
cursor = conn.cursor()
363+
cursor.execute("SELECT translation_id, config FROM translation_jobs")
364+
db_job_ids = set()
365+
preserved_files = set() # Full file paths that are referenced
366+
for row in cursor.fetchall():
367+
db_job_ids.add(row['translation_id'])
368+
config = json.loads(row['config'])
369+
preserved_path = config.get('preserved_input_path', '')
370+
if preserved_path:
371+
# Store the filename to check against orphan files
372+
preserved_files.add(Path(preserved_path).name)
373+
conn.close()
374+
except Exception as e:
375+
print(f"Warning: Error getting job IDs: {e}")
376+
return 0
377+
378+
# Check each item in uploads directory
379+
for item in self.uploads_dir.iterdir():
380+
item_name = item.name
381+
382+
# Skip test folders
383+
if item_name.startswith('test_'):
384+
continue
385+
386+
is_orphan = True
387+
388+
if item.is_dir():
389+
# It's a folder - check if it's a job ID folder
390+
if item_name.startswith('trans_'):
391+
if item_name in db_job_ids:
392+
is_orphan = False
393+
else:
394+
# It's a file - check if it's referenced by any job
395+
if item_name in preserved_files:
396+
is_orphan = False
397+
398+
if is_orphan:
399+
try:
400+
if item.is_dir():
401+
shutil.rmtree(item)
402+
else:
403+
item.unlink()
404+
orphans_deleted += 1
405+
except Exception as e:
406+
print(f"Warning: Could not delete orphan {item_name}: {e}")
407+
408+
return orphans_deleted
409+
266410
def mark_paused(self, translation_id: str) -> bool:
267411
"""
268412
Mark a job as paused (user-initiated stop).

0 commit comments

Comments
 (0)