-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_article.py
More file actions
40 lines (32 loc) · 1.22 KB
/
extract_article.py
File metadata and controls
40 lines (32 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import subprocess
import os
import tempfile
BUN_SCRIPT = "extract_article.ts"
def extract_webpage_content(url: str) -> tuple[str, str] | None:
# Use unique temp files to avoid concurrency issues
with (
tempfile.NamedTemporaryFile(mode="w", suffix="_article.txt", delete=False) as article_f,
tempfile.NamedTemporaryFile(mode="w", suffix="_title.txt", delete=False) as title_f,
):
article_path = article_f.name
title_path = title_f.name
try:
subprocess.run(
["bun", BUN_SCRIPT, url, article_path, title_path],
check=True,
)
if not (os.path.exists(article_path) and os.path.exists(title_path)):
print(f"Expected output files at {article_path} and {title_path}")
return None
with open(title_path, "r", encoding="utf-8") as f:
title = f.read()
with open(article_path, "r", encoding="utf-8") as f:
contents = f.read()
return title, contents
except subprocess.CalledProcessError as e:
print(f"Error running script: {e}")
return None
finally:
for path in (article_path, title_path):
if os.path.exists(path):
os.remove(path)