Skip to content

Commit e98b950

Browse files
committed
minor review fixes
1 parent ef6a73d commit e98b950

File tree

3 files changed

+13
-10
lines changed

3 files changed

+13
-10
lines changed

bot.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,13 @@ async def set_preprocess(update: Update, context: ContextTypes.DEFAULT_TYPE):
8181
await update.message.reply_text(f"Preprocessing set to: {mode}{PREPROCESS_MODES[mode]}")
8282

8383

84-
def apply_preprocessing(content: str, mode: str) -> str:
84+
async def apply_preprocessing(content: str, mode: str) -> str:
8585
if mode == "none":
8686
return content
8787
elif mode == "regex":
8888
return preprocess_for_tts(content)
8989
elif mode == "llm":
90-
return rewrite_for_audio(preprocess_for_tts(content))
90+
return await rewrite_for_audio(preprocess_for_tts(content))
9191
return content
9292

9393

@@ -122,7 +122,7 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
122122
mp3_filename = title.replace(" ", "_").lower() + ".mp3"
123123

124124
await update.message.reply_text(f"Extracted content, preprocessing ({preprocess_mode})...")
125-
content = apply_preprocessing(content, preprocess_mode)
125+
content = await apply_preprocessing(content, preprocess_mode)
126126

127127
await update.message.reply_text("Producing audio...")
128128
metadata = text_to_mp3(text=content, output_mp3=mp3_filename, model_name=model_name, speed=1.0)

llm_preprocess.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,36 +8,39 @@
88
REWRITE_PROMPT = """Update the following article for audio narration. Follow these rules strictly:
99
1010
1. Remove all URLs, email addresses, and hyperlinks entirely.
11-
2. Remove code blocks. If a code block is central to the article's point, briefly describe what it does in one sentence.
11+
2. Remove code blocks. If a code block is central to the articles point, briefly describe what it does in one sentence.
1212
3. Convert tables to short prose descriptions.
1313
4. Remove all citation markers like [1], [2], etc.
1414
5. Remove references to figures, images, charts, or any visual elements (e.g. "see Figure 3", "as shown below").
1515
6. Expand abbreviations: "e.g." → "for example", "i.e." → "that is", "etc." → "et cetera".
1616
7. Write out numbers as words when appropriate. This includes years.
1717
8. Remove all markdown formatting (headers, bold, italic, links).
1818
9. Keep the content faithful to the original — do not add or rewrite anything that isn’t covered by the rules above.
19-
12. Output ONLY the rewritten text, nothing else.
19+
10. Output ONLY the rewritten text, nothing else.
2020
2121
Article text:
2222
2323
{text}"""
2424

2525

26-
def rewrite_for_audio(text: str) -> str:
26+
async def rewrite_for_audio(text: str) -> str:
2727
"""Use Claude to rewrite article text for audio narration."""
2828
api_key = os.getenv("ANTHROPIC_API_KEY")
2929
if not api_key:
3030
raise ValueError("ANTHROPIC_API_KEY environment variable is required for LLM preprocessing")
3131

32-
client = anthropic.Anthropic(api_key=api_key)
32+
client = anthropic.AsyncAnthropic(api_key=api_key)
3333

34-
message = client.messages.create(
34+
message = await client.messages.create(
3535
model="claude-haiku-4-5-20251001",
36-
max_tokens=8192,
36+
max_tokens=65536,
3737
system=SYSTEM_PROMPT,
3838
messages=[
3939
{"role": "user", "content": REWRITE_PROMPT.format(text=text)},
4040
],
4141
)
4242

43+
if message.stop_reason == "max_tokens":
44+
print("Warning: LLM preprocessing output was truncated due to max_tokens limit")
45+
4346
return message.content[0].text

preprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
TITLE_ABBREVIATIONS = {
2121
"Dr.": "Doctor",
2222
"Mr.": "Mister",
23-
"Mrs.": "Misses",
23+
"Mrs.": "Missus",
2424
"Ms.": "Ms",
2525
"Prof.": "Professor",
2626
"Sr.": "Senior",

0 commit comments

Comments
 (0)