Skip to content

Commit 1935044

Browse files
committed
feat(writer): load previous blog posts and provide Chinese copywriting guidelines
1 parent f682f51 commit 1935044

File tree

2 files changed

+85
-55
lines changed

2 files changed

+85
-55
lines changed

.github/workflows/auto-writer.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
uses: actions/checkout@v4
1414
- name: Install Python libraries
1515
run: |
16-
pip install openai bs4 requests
16+
pip install openai bs4 requests yaml
1717
- name: Article creation script
1818
env:
1919
DS_APIKEY: ${{ secrets.DS_APIKEY }}

writer.py

Lines changed: 84 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -5,65 +5,95 @@
55
import time
66
import datetime
77
import os
8+
import glob
9+
import yaml
810

911
path_to = f'src/content/blog/{datetime.datetime.now().strftime("%Y-%m-%d")}'
1012

1113
if os.path.exists(path_to):
12-
print("Article already generated today.")
13-
exit(0)
14+
print("Article already generated today.")
15+
exit(0)
1416
else:
15-
os.makedirs(path_to, exist_ok=True)
16-
print(f"Created directory {path_to}")
17+
os.makedirs(path_to, exist_ok=True)
18+
print(f"Created directory {path_to}")
1719

1820
start = time.time()
1921
print("Connecting to LLM API ...")
2022
deepseek = OpenAI(base_url="https://api.deepseek.com", api_key=os.environ.get("DS_APIKEY"))
2123
print(f"Initialized LLM API. ({time.time() - start:.1f}s)")
2224

2325
def generate(context, provider, model):
24-
completion = provider.chat.completions.create(
25-
model=model,
26-
messages=context
27-
)
28-
return completion.choices[0].message.content.strip()
26+
completion = provider.chat.completions.create(
27+
model=model,
28+
messages=context
29+
)
30+
return completion.choices[0].message.content.strip()
2931

3032
def scrape_website(url, css_selector):
31-
response = requests.get(url)
32-
if response.status_code == 200:
33-
soup = BeautifulSoup(response.content, "html.parser")
34-
elements = soup.select(css_selector)
35-
return elements
36-
else: return []
33+
response = requests.get(url)
34+
if response.status_code == 200:
35+
soup = BeautifulSoup(response.content, "html.parser")
36+
elements = soup.select(css_selector)
37+
return elements
38+
else: return []
39+
40+
# New function to get existing blog posts
41+
def get_existing_blog_posts():
42+
blog_posts = []
43+
blog_paths = glob.glob("src/content/blog/*/index.md")
44+
45+
for path in blog_paths:
46+
try:
47+
with open(path, 'r', encoding='utf-8') as f:
48+
content = f.read()
49+
# Parse frontmatter
50+
if content.startswith('---'):
51+
_, frontmatter, markdown = content.split('---', 2)
52+
metadata = yaml.safe_load(frontmatter)
53+
blog_posts.append({
54+
'title': metadata.get('title', ''),
55+
'description': metadata.get('description', '')
56+
})
57+
except Exception as e:
58+
print(f"Error reading {path}: {e}")
59+
60+
return blog_posts
61+
62+
# Get existing blog posts
63+
existing_posts = get_existing_blog_posts()
64+
existing_posts_text = "\n".join([f"标题: {post['title']}\n描述: {post['description']}" for post in existing_posts])
65+
print(f"Loaded {len(existing_posts)} existing blog posts.")
3766

3867
topics = [topic.get_text(strip=True) for topic in scrape_website("https://news.ycombinator.com/", ".titleline")]
3968
topics_text = "\n".join(random.choices(topics, k=random.randint(5, len(topics))))
4069
print(f"Scraped {len(topics)} topics from Hacker News.")
4170

4271
def extract_topic(topics):
43-
global deepseek
44-
return generate([
45-
{"role": "system", "content": "你在为一篇技术博客确定一个主题。直接用中文输出主题。"},
46-
{"role": "user", "content": f"阅读以下是HackerNews的热门文章,然后写一个可以用于技术博客的主题。这个主题应当是一个通用、普通的技术,不能是一个事件或其它东西。\n\n{topics}\n\n只需要一个主题,直接输出。"},
47-
], deepseek, "deepseek-chat")
72+
global deepseek, existing_posts_text
73+
return generate([
74+
{"role": "system", "content": "你在为一篇技术博客确定一个主题。直接用中文输出主题。"},
75+
{"role": "user", "content": f"阅读以下是HackerNews的热门文章,然后写一个可以用于技术博客的主题。这个主题应当是一个通用、普通的技术,不能是一个事件或其它东西。\n\n{topics}\n\n以下是已有的博客文章,请避免选择相似的主题:\n\n{existing_posts_text}\n\n只需要一个主题,直接输出。"},
76+
], deepseek, "deepseek-chat")
4877

4978
def outline(topic):
50-
global deepseek
51-
return generate([
52-
{"role": "user", "content": f"我要写一篇关于「{topic}」的博客文章。帮我列一个详细的文章提纲。"}
53-
], deepseek, "deepseek-reasoner")
79+
global deepseek
80+
return generate([
81+
{"role": "user", "content": f"我要写一篇关于「{topic}」的博客文章。帮我列一个详细的文章提纲。"}
82+
], deepseek, "deepseek-reasoner")
5483

5584
def write_from_outline(outline):
56-
global deepseek
57-
return generate([
58-
{"role": "user", "content": f"{outline}\n\n根据这个提纲中关于技术知识的部分,写出一篇技术博客文章。文章中避免出现图片,避免使用列表。每一段出现的代码都进行较为详细的解读。在讲述内容时尽量使用段落的语言,语言风格可以略偏专业,但保持清晰。使用Markdown(要求符合Common Markdown规范)输出,使用LaTeX公式(注意:数学的开闭定界符前后不能有字母或数字字符。像x$a + b = c$或$a + b = c$1将无法渲染为数学公式(所有$会被渲染为$);但x $\infty$ 1和($\infty$)会正常渲染),标题尽量只用一级标题 `#` 和二级标题 `##`,不要用分割线。直接输出正文。"}
59-
], deepseek, "deepseek-reasoner")
85+
global deepseek, existing_posts_text
86+
return generate([
87+
{"role": "system", "content": "你是一位专业技术博客作者。在写作时请遵循以下中文排版规范:1) 中文与英文、数字之间需要有空格;2) 中文标点与英文、数字之间不加空格;3) 使用全角中文标点;4) 专有名词大小写正确;5) 英文、数字使用半角字符;6) 使用直角引号「」。"},
88+
{"role": "user", "content": f"{outline}\n\n根据这个提纲中关于技术知识的部分,写出一篇技术博客文章。文章中避免出现图片,避免使用列表。每一段出现的代码都进行较为详细的解读。在讲述内容时尽量使用段落的语言,语言风格可以略偏专业,但保持清晰。\n\n以下是已有的博客文章,请确保你的内容与它们不重复:\n\n{existing_posts_text}\n\n使用Markdown(要求符合Common Markdown规范)输出,使用LaTeX公式(注意:数学的开闭定界符前后不能有字母或数字字符。像x$a + b = c$或$a + b = c$1将无法渲染为数学公式(所有$会被渲染为$);但x $\\infty$ 1和($\\infty$)会正常渲染),标题尽量只用一级标题 `#` 和二级标题 `##`,不要用分割线。直接输出正文。"}
89+
], deepseek, "deepseek-reasoner")
6090

6191
def summary(article):
62-
global deepseek
63-
return generate([
64-
{"role": "system", "content": "你是一个技术博客简介写作者,简介不一定需要涵盖文章的全部内容,能起到一定的提示作用即可。直接输出简介。"},
65-
{"role": "user", "content": f"给这篇文章写一个15字的简短介绍:\n\n{article}"}
66-
], deepseek, "deepseek-chat")
92+
global deepseek
93+
return generate([
94+
{"role": "system", "content": "你是一个技术博客简介写作者,简介不一定需要涵盖文章的全部内容,能起到一定的提示作用即可。直接输出简介。请遵循中文排版规范,确保中英文之间有空格,使用正确的标点符号。"},
95+
{"role": "user", "content": f"给这篇文章写一个15字的简短介绍:\n\n{article}"}
96+
], deepseek, "deepseek-chat")
6797

6898
start = time.time()
6999
print("Generating topic ...")
@@ -90,30 +120,30 @@ def summary(article):
90120
author = random.choice(["杨其臻", "杨子凡", "叶家炜", "黄京"])
91121

92122
for line in lines:
93-
if line.startswith("# "):
94-
title = line[1:].strip()
95-
print(f"Detected title: {title}")
96-
97-
metadata = "\n".join([
98-
"---",
99-
f'title: "{title}"',
100-
f'author: "{author}"',
101-
f'date: "{datetime.datetime.now().strftime("%b %d, %Y")}"',
102-
f'description: "{summary_result}"',
103-
f'latex: true',
104-
f'pdf: true',
105-
"---",
106-
]) + "\n"
107-
print(f"Injecting metadata:\n{metadata.strip()}")
108-
109-
markdown_file += metadata
110-
break
123+
if line.startswith("# "):
124+
title = line[1:].strip()
125+
print(f"Detected title: {title}")
126+
127+
metadata = "\n".join([
128+
"---",
129+
f'title: "{title}"',
130+
f'author: "{author}"',
131+
f'date: "{datetime.datetime.now().strftime("%b %d, %Y")}"',
132+
f'description: "{summary_result}"',
133+
f'latex: true',
134+
f'pdf: true',
135+
"---",
136+
]) + "\n"
137+
print(f"Injecting metadata:\n{metadata.strip()}")
138+
139+
markdown_file += metadata
140+
break
111141

112142
for line in lines:
113-
if line.startswith("---"): continue
114-
markdown_file += line + "\n"
143+
if line.startswith("---"): continue
144+
markdown_file += line + "\n"
115145

116146
with open(f"{path_to}/index.md", "w", encoding="utf-8") as f:
117-
f.write(markdown_file)
147+
f.write(markdown_file)
118148

119149
print(f"Markdown file generated at {path_to}/index.md")

0 commit comments

Comments
 (0)