|
8 | 8 | REWRITE_PROMPT = """Update the following article for audio narration. Follow these rules strictly: |
9 | 9 |
|
10 | 10 | 1. Remove all URLs, email addresses, and hyperlinks entirely. |
11 | | -2. Remove code blocks. If a code block is central to the article's point, briefly describe what it does in one sentence. |
| 11 | +2. Remove code blocks. If a code block is central to the article’s point, briefly describe what it does in one sentence. |
12 | 12 | 3. Convert tables to short prose descriptions. |
13 | 13 | 4. Remove all citation markers like [1], [2], etc. |
14 | 14 | 5. Remove references to figures, images, charts, or any visual elements (e.g. "see Figure 3", "as shown below"). |
15 | 15 | 6. Expand abbreviations: "e.g." → "for example", "i.e." → "that is", "etc." → "et cetera". |
16 | 16 | 7. Write out numbers as words when appropriate. This includes years. |
17 | 17 | 8. Remove all markdown formatting (headers, bold, italic, links). |
18 | 18 | 9. Keep the content faithful to the original — do not add or rewrite anything that isn’t covered by the rules above. |
19 | | -12. Output ONLY the rewritten text, nothing else. |
| 19 | +10. Output ONLY the rewritten text, nothing else. |
20 | 20 |
|
21 | 21 | Article text: |
22 | 22 |
|
23 | 23 | {text}""" |
24 | 24 |
|
25 | 25 |
|
26 | | -def rewrite_for_audio(text: str) -> str: |
| 26 | +async def rewrite_for_audio(text: str) -> str: |
27 | 27 | """Use Claude to rewrite article text for audio narration.""" |
28 | 28 | api_key = os.getenv("ANTHROPIC_API_KEY") |
29 | 29 | if not api_key: |
30 | 30 | raise ValueError("ANTHROPIC_API_KEY environment variable is required for LLM preprocessing") |
31 | 31 |
|
32 | | - client = anthropic.Anthropic(api_key=api_key) |
| 32 | + client = anthropic.AsyncAnthropic(api_key=api_key) |
33 | 33 |
|
34 | | - message = client.messages.create( |
| 34 | + message = await client.messages.create( |
35 | 35 | model="claude-haiku-4-5-20251001", |
36 | | - max_tokens=8192, |
| 36 | + max_tokens=65536, |
37 | 37 | system=SYSTEM_PROMPT, |
38 | 38 | messages=[ |
39 | 39 | {"role": "user", "content": REWRITE_PROMPT.format(text=text)}, |
40 | 40 | ], |
41 | 41 | ) |
42 | 42 |
|
| 43 | + if message.stop_reason == "max_tokens": |
| 44 | + print("Warning: LLM preprocessing output was truncated due to max_tokens limit") |
| 45 | + |
43 | 46 | return message.content[0].text |
0 commit comments