minor review fixes

razumau · razumau · commit e98b950bc9f1 · 2026-03-01T23:26:42.000+01:00
diff --git a/bot.py b/bot.py
@@ -81,13 +81,13 @@ async def set_preprocess(update: Update, context: ContextTypes.DEFAULT_TYPE):
     await update.message.reply_text(f"Preprocessing set to: {mode} — {PREPROCESS_MODES[mode]}")
 
 
-def apply_preprocessing(content: str, mode: str) -> str:
+async def apply_preprocessing(content: str, mode: str) -> str:
     if mode == "none":
         return content
     elif mode == "regex":
         return preprocess_for_tts(content)
     elif mode == "llm":
-        return rewrite_for_audio(preprocess_for_tts(content))
+        return await rewrite_for_audio(preprocess_for_tts(content))
     return content
 
 
@@ -122,7 +122,7 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
         mp3_filename = title.replace(" ", "_").lower() + ".mp3"
 
         await update.message.reply_text(f"Extracted content, preprocessing ({preprocess_mode})...")
-        content = apply_preprocessing(content, preprocess_mode)
+        content = await apply_preprocessing(content, preprocess_mode)
 
         await update.message.reply_text("Producing audio...")
         metadata = text_to_mp3(text=content, output_mp3=mp3_filename, model_name=model_name, speed=1.0)
diff --git a/llm_preprocess.py b/llm_preprocess.py
@@ -8,36 +8,39 @@
 REWRITE_PROMPT = """Update the following article for audio narration. Follow these rules strictly:
 
 1. Remove all URLs, email addresses, and hyperlinks entirely.
-2. Remove code blocks. If a code block is central to the article's point, briefly describe what it does in one sentence.
+2. Remove code blocks. If a code block is central to the article’s point, briefly describe what it does in one sentence.
 3. Convert tables to short prose descriptions.
 4. Remove all citation markers like [1], [2], etc.
 5. Remove references to figures, images, charts, or any visual elements (e.g. "see Figure 3", "as shown below").
 6. Expand abbreviations: "e.g." → "for example", "i.e." → "that is", "etc." → "et cetera".
 7. Write out numbers as words when appropriate. This includes years.
 8. Remove all markdown formatting (headers, bold, italic, links).
 9. Keep the content faithful to the original — do not add or rewrite anything that isn’t covered by the rules above.
-12. Output ONLY the rewritten text, nothing else.
+10. Output ONLY the rewritten text, nothing else.
 
 Article text:
 
 {text}"""
 
 
-def rewrite_for_audio(text: str) -> str:
+async def rewrite_for_audio(text: str) -> str:
     """Use Claude to rewrite article text for audio narration."""
     api_key = os.getenv("ANTHROPIC_API_KEY")
     if not api_key:
         raise ValueError("ANTHROPIC_API_KEY environment variable is required for LLM preprocessing")
 
-    client = anthropic.Anthropic(api_key=api_key)
+    client = anthropic.AsyncAnthropic(api_key=api_key)
 
-    message = client.messages.create(
+    message = await client.messages.create(
         model="claude-haiku-4-5-20251001",
-        max_tokens=8192,
+        max_tokens=65536,
         system=SYSTEM_PROMPT,
         messages=[
             {"role": "user", "content": REWRITE_PROMPT.format(text=text)},
         ],
     )
 
+    if message.stop_reason == "max_tokens":
+        print("Warning: LLM preprocessing output was truncated due to max_tokens limit")
+
     return message.content[0].text
diff --git a/preprocess.py b/preprocess.py
@@ -20,7 +20,7 @@
 TITLE_ABBREVIATIONS = {
     "Dr.": "Doctor",
     "Mr.": "Mister",
-    "Mrs.": "Misses",
+    "Mrs.": "Missus",
     "Ms.": "Ms",
     "Prof.": "Professor",
     "Sr.": "Senior",