Skip to content

Commit 17b33a3

Browse files
committed
chore: Remove legacy chunk size configuration and clean up debug code
- Removed MAIN_LINES_PER_CHUNK parameter (replaced by token-based chunking) - Changed language detection to auto-detect source from files and target from browser - Removed deprecated chunk_size from CLI args, API config, and TranslationConfig - Cleaned up 3 debug print statements in config_routes and translation_routes - Updated output filename format to use parentheses style (file (lang).ext) - Removed unused configuration validation code in CLI
1 parent b7ea2c5 commit 17b33a3

21 files changed

+284
-193
lines changed

.env.example

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ OUTPUT_DIR=translated_files # Directory for translated output files
1212
LLM_PROVIDER=ollama
1313
# Your Google Gemini API key (required if using gemini provider)
1414
GEMINI_API_KEY=
15-
GEMINI_MODEL=gemini-3-flash
15+
GEMINI_MODEL=gemini-2.0-flash
1616
# Your OpenAI API key (required if using openai provider)
1717
OPENAI_API_KEY=
1818

@@ -23,9 +23,8 @@ OPENROUTER_MODEL=anthropic/claude-4.5-haiku
2323
# See all models at https://openrouter.ai/models (text-only models are auto-filtered in the UI)
2424

2525
# Translation Settings
26-
DEFAULT_SOURCE_LANGUAGE=English # Default source language (can be any language name)
27-
DEFAULT_TARGET_LANGUAGE=Chinese # Default target language (can be any language name)
28-
MAIN_LINES_PER_CHUNK=50 # Legacy parameter - kept for SRT grouping only (not used for text chunking)
26+
# Source language: Auto-detected from uploaded file (uses langdetect library)
27+
# Target language: Auto-detected from browser language (can be changed in UI)
2928
MAIN_CHUNK_SIZE=1000 # Maximum characters per chunk
3029
REQUEST_TIMEOUT=900 # API timeout in seconds
3130

@@ -51,19 +50,6 @@ AUTO_ADJUST_CONTEXT=true # Automatically adjust context/chunk size if prompt to
5150
# Advanced
5251
MAX_TRANSLATION_ATTEMPTS=3
5352

54-
# =============================================================================
55-
# EPUB Translation - Token Alignment Fallback (Phase 2)
56-
# =============================================================================
57-
# When the LLM fails to preserve HTML placeholders correctly during EPUB
58-
# translation, this fallback uses word-level alignment to reinsert them at
59-
# semantically correct positions. This dramatically improves translation
60-
# quality by ensuring 100% placeholder integrity.
61-
#
62-
# Multi-phase fallback system:
63-
# Phase 1: Normal translation with placeholders (retry logic)
64-
# Phase 2: Token alignment fallback (translate clean, then reinsert)
65-
# Phase 3: Untranslated fallback (preserve original if all else fails)
66-
6753
EPUB_TOKEN_ALIGNMENT_ENABLED=true
6854
# Options: true (enable Phase 2 fallback), false (use old behavior with only Phase 1 + Phase 3)
6955

@@ -76,18 +62,18 @@ EPUB_TOKEN_ALIGNMENT_METHOD=proportional
7662
SRT_LINES_PER_BLOCK=20
7763
SRT_MAX_CHARS_PER_BLOCK=2000
7864

79-
# Translation Signature
80-
# This adds a discrete attribution to your translations (metadata for EPUB, footer for TXT, comment for SRT)
81-
# Please consider keeping this enabled to support the project and help others discover this free tool!
82-
# The signature is non-intrusive and placed at the end of files. Thank you for your support!
83-
SIGNATURE_ENABLED=true
84-
8565
# Debug Mode
8666
# Enable verbose logging for troubleshooting configuration and connection issues.
8767
# Set to 'true' to see detailed logs about .env loading, API calls, and configuration values.
8868
# Useful when models are not detected or languages are not applied correctly.
8969
DEBUG_MODE=false
9070

71+
# Translation Signature
72+
# This adds a discrete attribution to your translations (metadata for EPUB, footer for TXT, comment for SRT)
73+
# Please consider keeping this enabled to support the project and help others discover this free tool!
74+
# The signature is non-intrusive and placed at the end of files. Thank you for your support!
75+
SIGNATURE_ENABLED=true
76+
9177
# TTS (Text-to-Speech) Configuration
9278
# Generate audio narration of translated documents
9379
# Requires ffmpeg installed on the system for Opus encoding
@@ -145,3 +131,5 @@ TTS_CFG_WEIGHT=0.5
145131
# cs (Czech), ar (Arabic), zh (Chinese), ja (Japanese), hu (Hungarian),
146132
# ko (Korean), hi (Hindi), vi (Vietnamese), sv (Swedish), da (Danish),
147133
# fi (Finnish), id (Indonesian), el (Greek)
134+
135+

DOCKER.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ Docker automatically pulls the correct architecture for your system.
102102
| `PORT` | Web server port | `5000` |
103103
| `OLLAMA_NUM_CTX` | Context window size | `2048` |
104104
| `REQUEST_TIMEOUT` | API timeout (seconds) | `900` |
105-
| `MAIN_LINES_PER_CHUNK` | Lines per translation chunk | `25` |
105+
| `MAX_TOKENS_PER_CHUNK` | Tokens per translation chunk | `400` |
106106
| `SIGNATURE_ENABLED` | Add signature to translations | `true` |
107107

108108
## Volume Mounts

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,32 +64,32 @@ See [docs/PROVIDERS.md](docs/PROVIDERS.md) for detailed setup instructions.
6464
## Command Line
6565

6666
```bash
67-
# Basic
68-
python translate.py -i book.epub -o book_zh.epub -sl English -tl Chinese
67+
# Basic (auto-generates "book (Chinese).epub")
68+
python translate.py -i book.epub -sl English -tl Chinese
6969

7070
# With OpenRouter
71-
python translate.py -i book.txt -o book_fr.txt --provider openrouter \
72-
--openrouter_api_key YOUR_KEY -m anthropic/claude-sonnet-4
71+
python translate.py -i book.txt --provider openrouter \
72+
--openrouter_api_key YOUR_KEY -m anthropic/claude-sonnet-4 -tl French
7373

7474
# With OpenAI
75-
python translate.py -i book.txt -o book_fr.txt --provider openai \
76-
--openai_api_key YOUR_KEY -m gpt-4o
75+
python translate.py -i book.txt --provider openai \
76+
--openai_api_key YOUR_KEY -m gpt-4o -tl French
7777

7878
# With Gemini
79-
python translate.py -i book.txt -o book_fr.txt --provider gemini \
80-
--gemini_api_key YOUR_KEY -m gemini-2.0-flash
79+
python translate.py -i book.txt --provider gemini \
80+
--gemini_api_key YOUR_KEY -m gemini-2.0-flash -tl French
8181

8282
# With local OpenAI-compatible server (llama.cpp, LM Studio, vLLM, etc.)
83-
python translate.py -i book.txt -o book_fr.txt --provider openai \
84-
--api_endpoint http://localhost:8080/v1/chat/completions -m your-model
83+
python translate.py -i book.txt --provider openai \
84+
--api_endpoint http://localhost:8080/v1/chat/completions -m your-model -tl French
8585
```
8686

8787
### Main Options
8888

8989
| Option | Description | Default |
9090
|--------|-------------|---------|
9191
| `-i, --input` | Input file | Required |
92-
| `-o, --output` | Output file | Auto |
92+
| `-o, --output` | Output file | Auto: `{name} ({lang}).{ext}` |
9393
| `-sl, --source_lang` | Source language | English |
9494
| `-tl, --target_lang` | Target language | Chinese |
9595
| `-m, --model` | Model name | mistral-small:24b |
@@ -117,8 +117,8 @@ OPENAI_API_KEY=sk-...
117117
GEMINI_API_KEY=...
118118

119119
# Performance
120-
MAIN_LINES_PER_CHUNK=25
121120
REQUEST_TIMEOUT=900
121+
MAX_TOKENS_PER_CHUNK=400 # Token-based chunking (default: 400 tokens)
122122
```
123123

124124
---

deployment/.env.docker.example

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ DEFAULT_SOURCE_LANGUAGE=English
4747
DEFAULT_TARGET_LANGUAGE=Chinese
4848

4949
# Chunking configuration
50-
MAIN_LINES_PER_CHUNK=25
5150
MAIN_CHUNK_SIZE=1000
5251

5352
# API request timeout (in seconds)

deployment/docker-compose.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ services:
3636
# Translation Settings
3737
- DEFAULT_SOURCE_LANGUAGE=${DEFAULT_SOURCE_LANGUAGE:-English}
3838
- DEFAULT_TARGET_LANGUAGE=${DEFAULT_TARGET_LANGUAGE:-Chinese}
39-
- MAIN_LINES_PER_CHUNK=${MAIN_LINES_PER_CHUNK:-25}
4039
- MAIN_CHUNK_SIZE=${MAIN_CHUNK_SIZE:-1000}
4140
- REQUEST_TIMEOUT=${REQUEST_TIMEOUT:-900}
4241

docs/CLI.md

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ python translate.py -i input_file -o output_file
2424

2525
| Option | Description | Default |
2626
|--------|-------------|---------|
27-
| `-o, --output` | Output file path | Auto-generated |
27+
| `-o, --output` | Output file path | Auto-generated as `{original} ({target_lang}).{ext}` |
2828

2929
### Languages
3030

@@ -70,14 +70,17 @@ python translate.py -i input_file -o output_file
7070
### Basic Translation
7171

7272
```bash
73-
# Text file
74-
python translate.py -i book.txt -o book_fr.txt -sl English -tl French
73+
# Text file (auto-generates "book (French).txt")
74+
python translate.py -i book.txt -sl English -tl French
7575

76-
# Subtitles
77-
python translate.py -i movie.srt -o movie_fr.srt -tl French
76+
# Subtitles (auto-generates "movie (French).srt")
77+
python translate.py -i movie.srt -tl French
7878

79-
# EPUB
80-
python translate.py -i novel.epub -o novel_fr.epub -tl French
79+
# EPUB (auto-generates "novel (French).epub")
80+
python translate.py -i novel.epub -tl French
81+
82+
# Custom output filename
83+
python translate.py -i book.txt -o my_custom_name.txt -tl French
8184
```
8285

8386
### With Different Providers
@@ -142,8 +145,8 @@ OPENAI_API_KEY=sk-...
142145
GEMINI_API_KEY=...
143146

144147
# Performance
145-
MAIN_LINES_PER_CHUNK=25
146148
REQUEST_TIMEOUT=900
149+
MAX_TOKENS_PER_CHUNK=400 # Token-based chunking (default: 400 tokens)
147150

148151
# Languages
149152
DEFAULT_SOURCE_LANGUAGE=English

docs/DOCKER_DEPLOYMENT.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ For larger chunks and better translations:
539539

540540
```env
541541
OLLAMA_NUM_CTX=16384
542-
MAIN_LINES_PER_CHUNK=50
542+
MAX_TOKENS_PER_CHUNK=800 # Larger token chunks for better context
543543
```
544544

545545
### Adjust Timeouts

docs/TROUBLESHOOTING.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Solutions to common problems with TBL.
4343
**Cause**: Chunk is too large for model's context window.
4444

4545
**Solutions**:
46-
1. Reduce chunk size: `-cs 15` or `MAIN_LINES_PER_CHUNK=15`
46+
1. Reduce chunk size: `MAX_TOKENS_PER_CHUNK=200` (default: 400)
4747
2. Increase context window: `OLLAMA_NUM_CTX=8192`
4848

4949
---
@@ -56,7 +56,7 @@ Solutions to common problems with TBL.
5656

5757
**Solutions**:
5858
1. Increase timeout: `REQUEST_TIMEOUT=1800` (30 min)
59-
2. Reduce chunk size: `MAIN_LINES_PER_CHUNK=15`
59+
2. Reduce chunk size: `MAX_TOKENS_PER_CHUNK=200` (default: 400)
6060
3. Try a smaller model
6161
4. Try a cloud provider
6262

src/api/blueprints/config_routes.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ def get_config_path():
2222
from src.config import (
2323
API_ENDPOINT as DEFAULT_OLLAMA_API_ENDPOINT,
2424
DEFAULT_MODEL,
25-
MAIN_LINES_PER_CHUNK,
2625
REQUEST_TIMEOUT,
2726
OLLAMA_NUM_CTX,
2827
MAX_TRANSLATION_ATTEMPTS,
@@ -113,7 +112,6 @@ def mask_api_key(key):
113112
config_response = {
114113
"api_endpoint": DEFAULT_OLLAMA_API_ENDPOINT,
115114
"default_model": DEFAULT_MODEL,
116-
"chunk_size": MAIN_LINES_PER_CHUNK,
117115
"timeout": REQUEST_TIMEOUT,
118116
"context_window": OLLAMA_NUM_CTX,
119117
"max_attempts": MAX_TRANSLATION_ATTEMPTS,
@@ -124,15 +122,12 @@ def mask_api_key(key):
124122
"openrouter_api_key": mask_api_key(OPENROUTER_API_KEY),
125123
"gemini_api_key_configured": bool(GEMINI_API_KEY),
126124
"openai_api_key_configured": bool(OPENAI_API_KEY),
127-
"openrouter_api_key_configured": bool(OPENROUTER_API_KEY),
128-
"default_source_language": DEFAULT_SOURCE_LANGUAGE,
129-
"default_target_language": DEFAULT_TARGET_LANGUAGE
125+
"openrouter_api_key_configured": bool(OPENROUTER_API_KEY)
126+
# Languages are no longer sent from server - handled by browser detection
130127
}
131128

132129
if DEBUG_MODE:
133130
logger.debug(f"📤 /api/config response:")
134-
logger.debug(f" default_source_language: {DEFAULT_SOURCE_LANGUAGE}")
135-
logger.debug(f" default_target_language: {DEFAULT_TARGET_LANGUAGE}")
136131
logger.debug(f" api_endpoint: {DEFAULT_OLLAMA_API_ENDPOINT}")
137132
logger.debug(f" default_model: {DEFAULT_MODEL}")
138133

@@ -207,7 +202,6 @@ def _get_openrouter_models(provided_api_key=None):
207202
})
208203

209204
except Exception as e:
210-
print(f"❌ Error retrieving OpenRouter models: {e}")
211205
return jsonify({
212206
"models": [],
213207
"model_names": [],
@@ -353,7 +347,6 @@ def _get_gemini_models(provided_api_key=None):
353347
})
354348

355349
except Exception as e:
356-
print(f"❌ Error retrieving Gemini models: {e}")
357350
return jsonify({
358351
"models": [],
359352
"default": default_model,
@@ -558,9 +551,9 @@ def save_settings():
558551
'OPENROUTER_MODEL',
559552
'DEFAULT_MODEL',
560553
'LLM_PROVIDER',
561-
'DEFAULT_SOURCE_LANGUAGE',
562-
'DEFAULT_TARGET_LANGUAGE',
563554
'API_ENDPOINT'
555+
# DEFAULT_SOURCE_LANGUAGE and DEFAULT_TARGET_LANGUAGE removed
556+
# Languages are now auto-detected (source) and browser-detected (target)
564557
}
565558

566559
try:
@@ -607,9 +600,8 @@ def get_settings():
607600
"openrouter_api_key_configured": bool(OPENROUTER_API_KEY),
608601
"default_model": DEFAULT_MODEL or "",
609602
"llm_provider": os.getenv('LLM_PROVIDER', 'ollama'),
610-
"default_source_language": DEFAULT_SOURCE_LANGUAGE or "English",
611-
"default_target_language": DEFAULT_TARGET_LANGUAGE or "Chinese",
612603
"api_endpoint": DEFAULT_OLLAMA_API_ENDPOINT or ""
604+
# Languages are no longer stored in .env - auto-detected per session
613605
})
614606

615607
return bp

src/api/blueprints/translation_routes.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from flask import Blueprint, request, jsonify
88

99
from src.config import (
10-
MAIN_LINES_PER_CHUNK,
1110
REQUEST_TIMEOUT,
1211
OLLAMA_NUM_CTX
1312
)
@@ -64,15 +63,11 @@ def start_translation_request():
6463
# Generate unique translation ID
6564
translation_id = f"trans_{int(time.time() * 1000)}"
6665

67-
# Debug: Log received prompt_options
68-
print(f"[DEBUG] Received prompt_options: {data.get('prompt_options', {})}")
69-
7066
# Build configuration
7167
config = {
7268
'source_language': data['source_language'],
7369
'target_language': data['target_language'],
7470
'model': data['model'],
75-
'chunk_size': int(data.get('chunk_size', MAIN_LINES_PER_CHUNK)),
7671
'llm_api_endpoint': data['llm_api_endpoint'],
7772
'request_timeout': int(data.get('timeout', REQUEST_TIMEOUT)),
7873
'context_window': int(data.get('context_window', OLLAMA_NUM_CTX)),

0 commit comments

Comments
 (0)