Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@ ALLOWED_TELEGRAM_USERNAMES=username,another_username
ALLOWED_TELEGRAM_IDS=123456,1235678

ELEVENLABS_API_KEY=

# Required for LLM preprocessing modes
ANTHROPIC_API_KEY=
58 changes: 54 additions & 4 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,20 @@
from dotenv import load_dotenv

from extract_article import extract_webpage_content
from preprocess import preprocess_for_tts
from llm_preprocess import rewrite_for_audio
from podcast import add_episode
from tts import text_to_mp3, MODELS

load_dotenv()

PREPROCESS_MODES = {
"none": "No preprocessing (raw text)",
"regex": "Regex-based cleaning (remove URLs, code, citations, expand numbers)",
"llm": "LLM rewrite for natural audio narration",
}
DEFAULT_PREPROCESS = "regex"


async def start(update: Update, _context: ContextTypes.DEFAULT_TYPE):
await update.message.reply_text("Hello!")
Expand Down Expand Up @@ -56,6 +65,32 @@ async def set_model(update: Update, context: ContextTypes.DEFAULT_TYPE):
await update.message.reply_text(f"Model set to {model}")


async def set_preprocess(update: Update, context: ContextTypes.DEFAULT_TYPE):
user = update.message.from_user
if not is_allowed(user):
print(f"User {user} is not allowed")
return

if len(context.args) != 1 or context.args[0] not in PREPROCESS_MODES:
modes_list = "\n".join(f" {k}: {v}" for k, v in PREPROCESS_MODES.items())
await update.message.reply_text(f"Usage: /setpreprocess <mode>\nAvailable modes:\n{modes_list}")
return

mode = context.args[0]
context.user_data["preprocess"] = mode
await update.message.reply_text(f"Preprocessing set to: {mode} — {PREPROCESS_MODES[mode]}")


async def apply_preprocessing(content: str, mode: str) -> str:
if mode == "none":
return content
elif mode == "regex":
return preprocess_for_tts(content)
elif mode == "llm":
return await rewrite_for_audio(preprocess_for_tts(content))
return content


async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
user = update.message.from_user
if not is_allowed(user):
Expand All @@ -64,6 +99,7 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):

default_model_name = next(iter(MODELS.keys()))
model_name = context.user_data.get("model", default_model_name)
preprocess_mode = context.user_data.get("preprocess", DEFAULT_PREPROCESS)

url_pattern = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
urls = re.findall(url_pattern, update.message.text)
Expand All @@ -77,15 +113,28 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):

for url in urls:
start_time = time.time()
title, content = extract_webpage_content(url)
result = extract_webpage_content(url)
if result is None:
await update.message.reply_text(f"Failed to extract content from {url}")
continue

title, content = result
mp3_filename = title.replace(" ", "_").lower() + ".mp3"
await update.message.reply_text("Extracted content, producing audio")

await update.message.reply_text(f"Extracted content, preprocessing ({preprocess_mode})...")
content = await apply_preprocessing(content, preprocess_mode)

await update.message.reply_text("Producing audio...")
metadata = text_to_mp3(text=content, output_mp3=mp3_filename, model_name=model_name, speed=1.0)
await update.message.reply_text("Produced audio, updating feed")
description = f"Model: {metadata.model}. Voice: {metadata.voice}. {content[:150]}"
description = (
f"Model: {metadata.model}. Voice: {metadata.voice}. Preprocess: {preprocess_mode}. {content[:150]}"
)
add_episode(mp3_filename, title, description=description)
end_time = time.time()
await update.message.reply_text(f"Added “{title}” to the feed. This took {end_time - start_time:.2f} seconds")
await update.message.reply_text(
f"Added \u201c{title}\u201d to the feed. This took {end_time - start_time:.2f} seconds"
)

if len(urls) > 1:
await update.message.reply_text(f"Processed {len(urls)} URLs")
Expand All @@ -96,6 +145,7 @@ def main():

application.add_handler(CommandHandler("start", start))
application.add_handler(CommandHandler("setmodel", set_model))
application.add_handler(CommandHandler("setpreprocess", set_preprocess))
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))

print("Bot is running...")
Expand Down
32 changes: 22 additions & 10 deletions extract_article.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
import subprocess
import os
import tempfile

BUN_SCRIPT = "extract_article.ts"
ARTICLE_TITLE_FILE = "extracted_article_title.txt"
ARTICLE_FILE = "extracted_article.txt"


def extract_webpage_content(url: str) -> tuple[str, str] or None:
def extract_webpage_content(url: str) -> tuple[str, str] | None:
# Use unique temp files to avoid concurrency issues
with (
tempfile.NamedTemporaryFile(mode="w", suffix="_article.txt", delete=False) as article_f,
tempfile.NamedTemporaryFile(mode="w", suffix="_title.txt", delete=False) as title_f,
):
article_path = article_f.name
title_path = title_f.name

try:
subprocess.run(["bun", BUN_SCRIPT, url], check=True)
subprocess.run(
["bun", BUN_SCRIPT, url, article_path, title_path],
check=True,
)

if not (os.path.exists(ARTICLE_FILE) and os.path.exists(ARTICLE_TITLE_FILE)):
print(f"We expect input files at {ARTICLE_FILE} and {ARTICLE_TITLE_FILE}")
if not (os.path.exists(article_path) and os.path.exists(title_path)):
print(f"Expected output files at {article_path} and {title_path}")
return None

with open(ARTICLE_TITLE_FILE, "r", encoding="utf-8") as f:
with open(title_path, "r", encoding="utf-8") as f:
title = f.read()
os.remove(ARTICLE_TITLE_FILE)

with open(ARTICLE_FILE, "r", encoding="utf-8") as f:
with open(article_path, "r", encoding="utf-8") as f:
contents = f.read()
os.remove(ARTICLE_FILE)

return title, contents
except subprocess.CalledProcessError as e:
print(f"Error running script: {e}")
return None
finally:
for path in (article_path, title_path):
if os.path.exists(path):
os.remove(path)
4 changes: 2 additions & 2 deletions extract_article.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ const { JSDOM } = require('jsdom');
const fs = require('fs');

const url = process.argv[2];
const ARTICLE_FILE = 'extracted_article.txt';
const ARTICLE_TITLE_FILE = 'extracted_article_title.txt';
const ARTICLE_FILE = process.argv[3] || 'extracted_article.txt';
const ARTICLE_TITLE_FILE = process.argv[4] || 'extracted_article_title.txt';

if (!url) {
console.error('Provide a URL or local file path as an argument');
Expand Down
74 changes: 0 additions & 74 deletions html_fetcher.py

This file was deleted.

46 changes: 46 additions & 0 deletions llm_preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os

import anthropic

SYSTEM_PROMPT = """You are a professional audio producer who adapts written articles for podcast narration.
Your job is to slightly adjust the given article text so it sounds better when read aloud by a text-to-speech system."""

REWRITE_PROMPT = """Update the following article for audio narration. Follow these rules strictly:

1. Remove all URLs, email addresses, and hyperlinks entirely.
2. Remove code blocks. If a code block is central to the article’s point, briefly describe what it does in one sentence.
3. Convert tables to short prose descriptions.
4. Remove all citation markers like [1], [2], etc.
5. Remove references to figures, images, charts, or any visual elements (e.g. "see Figure 3", "as shown below").
6. Expand abbreviations: "e.g." → "for example", "i.e." → "that is", "etc." → "et cetera".
7. Write out numbers as words when appropriate. This includes years.
8. Remove all markdown formatting (headers, bold, italic, links).
9. Keep the content faithful to the original — do not add or rewrite anything that isn’t covered by the rules above.
10. Output ONLY the rewritten text, nothing else.

Article text:

{text}"""


async def rewrite_for_audio(text: str) -> str:
"""Use Claude to rewrite article text for audio narration."""
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise ValueError("ANTHROPIC_API_KEY environment variable is required for LLM preprocessing")

client = anthropic.AsyncAnthropic(api_key=api_key)

message = await client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=65536,
system=SYSTEM_PROMPT,
messages=[
{"role": "user", "content": REWRITE_PROMPT.format(text=text)},
],
)

if message.stop_reason == "max_tokens":
print("Warning: LLM preprocessing output was truncated due to max_tokens limit")

return message.content[0].text
1 change: 1 addition & 0 deletions models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class TTSMetadata:
model: str
voice: str


class BaseTTS(ABC):
@abstractmethod
def __init__(
Expand Down
12 changes: 9 additions & 3 deletions models/eleven.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@

GOOD_VOICES = ["Xb7hH8MSUJpSbSDYk0k2", "XB0fDUnXU5powFXDhCwa", "onwK4e9ZLuTAKqWW03F9", "ThT5KcBeYPX3keUQqHPh"]

AVAILABLE_MODELS = {
"eleven": "eleven_flash_v2_5",
"eleven_v3": "eleven_v3",
}


class ElevenLabsTTS(BaseTTS):
def __init__(
Expand All @@ -17,20 +22,21 @@ def __init__(
pick_random_voice: bool = False,
voice: str = GOOD_VOICES[0],
speed: float = 1.0,
model_id: str = "eleven_flash_v2_5",
):
self.text = text
self.output_filename = output_filename
if pick_random_voice:
self.voice = random.choice(GOOD_VOICES)
else:
self.voice = voice
self.model_id = "eleven_flash_v2_5"
self.model_id = model_id
self.client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))

def text_to_mp3(self) -> TTSMetadata:
response = self.client.text_to_speech.convert(
voice_id=self.voice,
output_format="mp3_22050_32",
output_format="mp3_44100_128",
text=self.text,
model_id=self.model_id,
voice_settings=VoiceSettings(
Expand All @@ -46,4 +52,4 @@ def text_to_mp3(self) -> TTSMetadata:
if chunk:
f.write(chunk)

return TTSMetadata(model="eleven", voice=self.voice)
return TTSMetadata(model=f"eleven ({self.model_id})", voice=self.voice)
Loading