|
| 1 | +import json |
| 2 | +import os |
| 3 | +import re |
| 4 | +from urllib.request import urlopen |
| 5 | + |
| 6 | +DISCOURSE_URL = "https://precice.discourse.group/c/news/5.json" |
| 7 | +OUTPUT_FILE = "./assets/data/news.json" |
| 8 | + |
| 9 | + |
| 10 | +def fetch_json(url: str): |
| 11 | + with urlopen(url) as res: |
| 12 | + return json.loads(res.read().decode("utf-8")) |
| 13 | + |
| 14 | + |
| 15 | +def strip_html(html: str) -> str: |
| 16 | + return re.sub(r"<[^>]*>", "", html) |
| 17 | + |
| 18 | + |
| 19 | +def main(): |
| 20 | + try: |
| 21 | + data = fetch_json(DISCOURSE_URL) |
| 22 | + topics = data.get("topic_list", {}).get("topics", []) |
| 23 | + |
| 24 | + news = [] |
| 25 | + for topic in topics: |
| 26 | + detail = fetch_json(f"https://precice.discourse.group/t/{topic['id']}.json") |
| 27 | + cooked = detail.get("post_stream", {}).get("posts", [{}])[0].get("cooked", "") |
| 28 | + text = strip_html(cooked).strip() |
| 29 | + |
| 30 | + excerpt = " ".join(text.split()[:30]) + "..." |
| 31 | + |
| 32 | + news.append({ |
| 33 | + "id": topic["id"], |
| 34 | + "title": topic["title"], |
| 35 | + "slug": topic["slug"], |
| 36 | + "url": f"https://precice.discourse.group/t/{topic['slug']}/{topic['id']}", |
| 37 | + "last_posted_at": topic.get("last_posted_at"), |
| 38 | + "like_count": topic.get("like_count"), |
| 39 | + "posts_count": topic.get("posts_count"), |
| 40 | + "views": topic.get("views"), |
| 41 | + "description": excerpt, |
| 42 | + }) |
| 43 | + |
| 44 | + os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True) |
| 45 | + with open(OUTPUT_FILE, "w", encoding="utf-8") as f: |
| 46 | + json.dump({"generated_at": __import__("datetime").datetime.utcnow().isoformat(), "topics": news}, f, indent=2) |
| 47 | + |
| 48 | + print(f"News data saved to {OUTPUT_FILE}") |
| 49 | + |
| 50 | + except Exception as e: |
| 51 | + print("Error fetching news:", e) |
| 52 | + |
| 53 | + |
| 54 | +if __name__ == "__main__": |
| 55 | + main() |
0 commit comments