Skip to content

Commit dbd1400

Browse files
authored
Merge pull request #2 from razumau/claude/review-app-improvements-Eso8v
Add text preprocessing and dialogue generation for TTS
2 parents 08a31e4 + 7cf8db1 commit dbd1400

File tree

14 files changed

+1218
-782
lines changed

14 files changed

+1218
-782
lines changed

.env.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@ ALLOWED_TELEGRAM_USERNAMES=username,another_username
2424
ALLOWED_TELEGRAM_IDS=123456,1235678
2525

2626
ELEVENLABS_API_KEY=
27+
28+
# Required for LLM preprocessing modes
29+
ANTHROPIC_API_KEY=

bot.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,20 @@
1414
from dotenv import load_dotenv
1515

1616
from extract_article import extract_webpage_content
17+
from preprocess import preprocess_for_tts
18+
from llm_preprocess import rewrite_for_audio
1719
from podcast import add_episode
1820
from tts import text_to_mp3, MODELS
1921

2022
load_dotenv()
2123

24+
PREPROCESS_MODES = {
25+
"none": "No preprocessing (raw text)",
26+
"regex": "Regex-based cleaning (remove URLs, code, citations, expand numbers)",
27+
"llm": "LLM rewrite for natural audio narration",
28+
}
29+
DEFAULT_PREPROCESS = "regex"
30+
2231

2332
async def start(update: Update, _context: ContextTypes.DEFAULT_TYPE):
2433
await update.message.reply_text("Hello!")
@@ -56,6 +65,32 @@ async def set_model(update: Update, context: ContextTypes.DEFAULT_TYPE):
5665
await update.message.reply_text(f"Model set to {model}")
5766

5867

68+
async def set_preprocess(update: Update, context: ContextTypes.DEFAULT_TYPE):
69+
user = update.message.from_user
70+
if not is_allowed(user):
71+
print(f"User {user} is not allowed")
72+
return
73+
74+
if len(context.args) != 1 or context.args[0] not in PREPROCESS_MODES:
75+
modes_list = "\n".join(f" {k}: {v}" for k, v in PREPROCESS_MODES.items())
76+
await update.message.reply_text(f"Usage: /setpreprocess <mode>\nAvailable modes:\n{modes_list}")
77+
return
78+
79+
mode = context.args[0]
80+
context.user_data["preprocess"] = mode
81+
await update.message.reply_text(f"Preprocessing set to: {mode}{PREPROCESS_MODES[mode]}")
82+
83+
84+
async def apply_preprocessing(content: str, mode: str) -> str:
85+
if mode == "none":
86+
return content
87+
elif mode == "regex":
88+
return preprocess_for_tts(content)
89+
elif mode == "llm":
90+
return await rewrite_for_audio(preprocess_for_tts(content))
91+
return content
92+
93+
5994
async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
6095
user = update.message.from_user
6196
if not is_allowed(user):
@@ -64,6 +99,7 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
6499

65100
default_model_name = next(iter(MODELS.keys()))
66101
model_name = context.user_data.get("model", default_model_name)
102+
preprocess_mode = context.user_data.get("preprocess", DEFAULT_PREPROCESS)
67103

68104
url_pattern = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
69105
urls = re.findall(url_pattern, update.message.text)
@@ -77,15 +113,28 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
77113

78114
for url in urls:
79115
start_time = time.time()
80-
title, content = extract_webpage_content(url)
116+
result = extract_webpage_content(url)
117+
if result is None:
118+
await update.message.reply_text(f"Failed to extract content from {url}")
119+
continue
120+
121+
title, content = result
81122
mp3_filename = title.replace(" ", "_").lower() + ".mp3"
82-
await update.message.reply_text("Extracted content, producing audio")
123+
124+
await update.message.reply_text(f"Extracted content, preprocessing ({preprocess_mode})...")
125+
content = await apply_preprocessing(content, preprocess_mode)
126+
127+
await update.message.reply_text("Producing audio...")
83128
metadata = text_to_mp3(text=content, output_mp3=mp3_filename, model_name=model_name, speed=1.0)
84129
await update.message.reply_text("Produced audio, updating feed")
85-
description = f"Model: {metadata.model}. Voice: {metadata.voice}. {content[:150]}"
130+
description = (
131+
f"Model: {metadata.model}. Voice: {metadata.voice}. Preprocess: {preprocess_mode}. {content[:150]}"
132+
)
86133
add_episode(mp3_filename, title, description=description)
87134
end_time = time.time()
88-
await update.message.reply_text(f"Added “{title}” to the feed. This took {end_time - start_time:.2f} seconds")
135+
await update.message.reply_text(
136+
f"Added \u201c{title}\u201d to the feed. This took {end_time - start_time:.2f} seconds"
137+
)
89138

90139
if len(urls) > 1:
91140
await update.message.reply_text(f"Processed {len(urls)} URLs")
@@ -96,6 +145,7 @@ def main():
96145

97146
application.add_handler(CommandHandler("start", start))
98147
application.add_handler(CommandHandler("setmodel", set_model))
148+
application.add_handler(CommandHandler("setpreprocess", set_preprocess))
99149
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
100150

101151
print("Bot is running...")

extract_article.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,40 @@
11
import subprocess
22
import os
3+
import tempfile
34

45
BUN_SCRIPT = "extract_article.ts"
5-
ARTICLE_TITLE_FILE = "extracted_article_title.txt"
6-
ARTICLE_FILE = "extracted_article.txt"
76

87

9-
def extract_webpage_content(url: str) -> tuple[str, str] or None:
8+
def extract_webpage_content(url: str) -> tuple[str, str] | None:
9+
# Use unique temp files to avoid concurrency issues
10+
with (
11+
tempfile.NamedTemporaryFile(mode="w", suffix="_article.txt", delete=False) as article_f,
12+
tempfile.NamedTemporaryFile(mode="w", suffix="_title.txt", delete=False) as title_f,
13+
):
14+
article_path = article_f.name
15+
title_path = title_f.name
16+
1017
try:
11-
subprocess.run(["bun", BUN_SCRIPT, url], check=True)
18+
subprocess.run(
19+
["bun", BUN_SCRIPT, url, article_path, title_path],
20+
check=True,
21+
)
1222

13-
if not (os.path.exists(ARTICLE_FILE) and os.path.exists(ARTICLE_TITLE_FILE)):
14-
print(f"We expect input files at {ARTICLE_FILE} and {ARTICLE_TITLE_FILE}")
23+
if not (os.path.exists(article_path) and os.path.exists(title_path)):
24+
print(f"Expected output files at {article_path} and {title_path}")
1525
return None
1626

17-
with open(ARTICLE_TITLE_FILE, "r", encoding="utf-8") as f:
27+
with open(title_path, "r", encoding="utf-8") as f:
1828
title = f.read()
19-
os.remove(ARTICLE_TITLE_FILE)
2029

21-
with open(ARTICLE_FILE, "r", encoding="utf-8") as f:
30+
with open(article_path, "r", encoding="utf-8") as f:
2231
contents = f.read()
23-
os.remove(ARTICLE_FILE)
2432

2533
return title, contents
2634
except subprocess.CalledProcessError as e:
2735
print(f"Error running script: {e}")
2836
return None
37+
finally:
38+
for path in (article_path, title_path):
39+
if os.path.exists(path):
40+
os.remove(path)

extract_article.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ const { JSDOM } = require('jsdom');
33
const fs = require('fs');
44

55
const url = process.argv[2];
6-
const ARTICLE_FILE = 'extracted_article.txt';
7-
const ARTICLE_TITLE_FILE = 'extracted_article_title.txt';
6+
const ARTICLE_FILE = process.argv[3] || 'extracted_article.txt';
7+
const ARTICLE_TITLE_FILE = process.argv[4] || 'extracted_article_title.txt';
88

99
if (!url) {
1010
console.error('Provide a URL or local file path as an argument');

html_fetcher.py

Lines changed: 0 additions & 74 deletions
This file was deleted.

llm_preprocess.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import os
2+
3+
import anthropic
4+
5+
SYSTEM_PROMPT = """You are a professional audio producer who adapts written articles for podcast narration.
6+
Your job is to slightly adjust the given article text so it sounds better when read aloud by a text-to-speech system."""
7+
8+
REWRITE_PROMPT = """Update the following article for audio narration. Follow these rules strictly:
9+
10+
1. Remove all URLs, email addresses, and hyperlinks entirely.
11+
2. Remove code blocks. If a code block is central to the article’s point, briefly describe what it does in one sentence.
12+
3. Convert tables to short prose descriptions.
13+
4. Remove all citation markers like [1], [2], etc.
14+
5. Remove references to figures, images, charts, or any visual elements (e.g. "see Figure 3", "as shown below").
15+
6. Expand abbreviations: "e.g." → "for example", "i.e." → "that is", "etc." → "et cetera".
16+
7. Write out numbers as words when appropriate. This includes years.
17+
8. Remove all markdown formatting (headers, bold, italic, links).
18+
9. Keep the content faithful to the original — do not add or rewrite anything that isn’t covered by the rules above.
19+
10. Output ONLY the rewritten text, nothing else.
20+
21+
Article text:
22+
23+
{text}"""
24+
25+
26+
async def rewrite_for_audio(text: str) -> str:
27+
"""Use Claude to rewrite article text for audio narration."""
28+
api_key = os.getenv("ANTHROPIC_API_KEY")
29+
if not api_key:
30+
raise ValueError("ANTHROPIC_API_KEY environment variable is required for LLM preprocessing")
31+
32+
client = anthropic.AsyncAnthropic(api_key=api_key)
33+
34+
message = await client.messages.create(
35+
model="claude-haiku-4-5-20251001",
36+
max_tokens=65536,
37+
system=SYSTEM_PROMPT,
38+
messages=[
39+
{"role": "user", "content": REWRITE_PROMPT.format(text=text)},
40+
],
41+
)
42+
43+
if message.stop_reason == "max_tokens":
44+
print("Warning: LLM preprocessing output was truncated due to max_tokens limit")
45+
46+
return message.content[0].text

models/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ class TTSMetadata:
77
model: str
88
voice: str
99

10+
1011
class BaseTTS(ABC):
1112
@abstractmethod
1213
def __init__(

models/eleven.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99
GOOD_VOICES = ["Xb7hH8MSUJpSbSDYk0k2", "XB0fDUnXU5powFXDhCwa", "onwK4e9ZLuTAKqWW03F9", "ThT5KcBeYPX3keUQqHPh"]
1010

11+
AVAILABLE_MODELS = {
12+
"eleven": "eleven_flash_v2_5",
13+
"eleven_v3": "eleven_v3",
14+
}
15+
1116

1217
class ElevenLabsTTS(BaseTTS):
1318
def __init__(
@@ -17,20 +22,21 @@ def __init__(
1722
pick_random_voice: bool = False,
1823
voice: str = GOOD_VOICES[0],
1924
speed: float = 1.0,
25+
model_id: str = "eleven_flash_v2_5",
2026
):
2127
self.text = text
2228
self.output_filename = output_filename
2329
if pick_random_voice:
2430
self.voice = random.choice(GOOD_VOICES)
2531
else:
2632
self.voice = voice
27-
self.model_id = "eleven_flash_v2_5"
33+
self.model_id = model_id
2834
self.client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
2935

3036
def text_to_mp3(self) -> TTSMetadata:
3137
response = self.client.text_to_speech.convert(
3238
voice_id=self.voice,
33-
output_format="mp3_22050_32",
39+
output_format="mp3_44100_128",
3440
text=self.text,
3541
model_id=self.model_id,
3642
voice_settings=VoiceSettings(
@@ -46,4 +52,4 @@ def text_to_mp3(self) -> TTSMetadata:
4652
if chunk:
4753
f.write(chunk)
4854

49-
return TTSMetadata(model="eleven", voice=self.voice)
55+
return TTSMetadata(model=f"eleven ({self.model_id})", voice=self.voice)

0 commit comments

Comments
 (0)