1+ #!/usr/bin/env python
2+ # -*- coding: utf-8 -*-
3+ """
4+ 生成大厂实践文章HTML页面
5+ """
6+
7+ import os
8+ import sys
9+ import json
10+ import shutil
11+ import argparse
12+ import time
13+ from datetime import datetime
14+ from jinja2 import Template
15+
16+ # 添加当前目录到系统路径
17+ sys .path .append (os .path .dirname (os .path .abspath (__file__ )))
18+
19+ # 直接定义配置项
20+ BASE_DIR = os .path .dirname (os .path .abspath (__file__ ))
21+ DATA_DIR = os .path .join (BASE_DIR , "data" )
22+ OUTPUT_DIR = os .path .join (BASE_DIR , "output" )
23+ STATIC_DIR = os .path .join (OUTPUT_DIR , "static" )
24+ ARTICLE_JSON_FILE = os .path .join (DATA_DIR , "article.json" )
25+ MAX_TITLE_DISPLAY_LENGTH = 100
26+
27+ # 定义确保目录存在的函数
28+ def ensure_directories ():
29+ """确保所有必要的目录存在"""
30+ for directory in [DATA_DIR , OUTPUT_DIR , STATIC_DIR ]:
31+ if not os .path .exists (directory ):
32+ os .makedirs (directory )
33+ print (f"创建目录: { directory } " )
34+
35+ def render_template (template_content , context ):
36+ """使用Jinja2渲染模板
37+
38+ Args:
39+ template_content: 模板内容
40+ context: 上下文变量字典
41+
42+ Returns:
43+ str: 渲染后的内容
44+ """
45+ template = Template (template_content )
46+ return template .render (** context )
47+
48+
49+ def load_article_data (file_path ):
50+ """加载文章数据
51+
52+ Args:
53+ file_path: JSON文件路径
54+
55+ Returns:
56+ list: 文章数据列表
57+ """
58+ try :
59+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
60+ data = json .load (f )
61+
62+ # 转换中文键名为英文键名
63+ converted_data = []
64+ for item in data :
65+ # 确保公司名称是字符串类型
66+ company = item .get ('公司' , '未知' )
67+ if not isinstance (company , str ):
68+ company = str (company ) if company is not None else '未知'
69+
70+ converted_item = {
71+ 'title' : item .get ('内容' , '无标题' ),
72+ 'link' : item .get ('链接' , '#' ),
73+ 'company' : company ,
74+ 'tags' : item .get ('标签' , []),
75+ 'date' : item .get ('时间' , '' )
76+ }
77+ converted_data .append (converted_item )
78+
79+ return converted_data
80+ except Exception as e :
81+ print (f"加载文章数据失败: { e } " )
82+ print ("请确保article.json文件存在且格式正确" )
83+ return []
84+
85+
86+ def get_sortable_date (date_str ):
87+ """将日期字符串转换为可排序的格式
88+
89+ Args:
90+ date_str: 日期字符串
91+
92+ Returns:
93+ str: 标准格式的日期字符串
94+ """
95+ try :
96+ # 处理格式:YYYY-MM-DD
97+ if len (date_str ) == 10 and '-' in date_str :
98+ return date_str
99+ # 处理格式:YYYY.MM.DD
100+ elif len (date_str ) == 10 and '.' in date_str :
101+ return date_str .replace ('.' , '-' )
102+ # 处理格式:MM/DD/YYYY
103+ elif len (date_str ) == 10 and '/' in date_str :
104+ parts = date_str .split ('/' )
105+ if len (parts ) == 3 :
106+ return f"{ parts [2 ]} -{ parts [0 ]} -{ parts [1 ]} "
107+ # 处理格式:YYYY年MM月DD日
108+ elif len (date_str ) >= 8 and '年' in date_str and '月' in date_str :
109+ year = date_str .split ('年' )[0 ]
110+ month_part = date_str .split ('年' )[1 ].split ('月' )[0 ]
111+ day_part = date_str .split ('月' )[1 ].split ('日' )[0 ] if '日' in date_str else date_str .split ('月' )[1 ]
112+ return f"{ year } -{ month_part .zfill (2 )} -{ day_part .zfill (2 )} "
113+ # 处理格式:MM-DD-YY
114+ elif len (date_str ) == 8 and '-' in date_str :
115+ parts = date_str .split ('-' )
116+ if len (parts ) == 3 and len (parts [2 ]) == 2 :
117+ return f"20{ parts [2 ]} -{ parts [0 ]} -{ parts [1 ]} "
118+ # 默认返回当前日期
119+ return datetime .now ().strftime ('%Y-%m-%d' )
120+ except Exception as e :
121+ print (f"日期格式转换错误: { date_str } , { e } " )
122+ return datetime .now ().strftime ('%Y-%m-%d' )
123+
124+
125+ def generate_table_rows (items ):
126+ """生成表格行HTML
127+
128+ Args:
129+ items: 文章数据列表
130+
131+ Returns:
132+ str: 表格行HTML
133+ """
134+ rows = []
135+ for idx , item in enumerate (items , 1 ): # 从1开始计数作为序号
136+ # 处理标题,限制显示长度
137+ title = item .get ('title' , '无标题' )
138+ display_title = title if len (title ) <= MAX_TITLE_DISPLAY_LENGTH else title [:MAX_TITLE_DISPLAY_LENGTH ] + '...'
139+
140+ # 处理链接
141+ link = item .get ('link' , '#' )
142+
143+ # 处理公司
144+ company = item .get ('company' , '未知' )
145+
146+ # 处理日期
147+ date = item .get ('date' , '' )
148+
149+ # 处理标签
150+ tags = item .get ('tags' , [])
151+ tag_display = ', ' .join (tags )
152+
153+ # 生成表格行HTML,确保列顺序与表头一致:序号、公司、标题、标签、发布时间
154+ row_html = f"""
155+ <tr class="article-row" data-company="{ company } " data-tags="{ ',' .join (tags )} ">
156+ <td class="px-4 py-3 whitespace-nowrap text-sm text-gray-500">
157+ { idx }
158+ </td>
159+ <td class="px-4 py-3 whitespace-nowrap">
160+ <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800">
161+ { company }
162+ </span>
163+ </td>
164+ <td class="px-4 py-3 whitespace-nowrap">
165+ <a href="{ link } " target="_blank" rel="noopener noreferrer" title="{ title } " class="text-blue-600 hover:text-blue-800 hover:underline transition-colors">
166+ { display_title }
167+ </a>
168+ </td>
169+ <td class="px-4 py-3 whitespace-nowrap text-sm text-gray-500">
170+ { tag_display }
171+ </td>
172+ <td class="px-4 py-3 whitespace-nowrap text-sm text-gray-500">
173+ { date }
174+ </td>
175+ </tr>
176+ """
177+ rows .append (row_html )
178+
179+ return '' .join (rows )
180+
181+
182+ def create_static_templates ():
183+ """创建静态资源文件"""
184+ # 检查并创建目录
185+ os .makedirs (STATIC_DIR , exist_ok = True )
186+
187+ # 静态资源文件已通过其他方式创建或维护
188+ print ("静态资源文件管理完成" )
189+
190+
191+ def generate_industry_html ():
192+ """生成大厂实践文章HTML页面
193+
194+ Returns:
195+ str: 生成的HTML文件路径
196+ """
197+ try :
198+ # 确保目录存在
199+ ensure_directories ()
200+
201+ # 创建静态模板文件
202+ create_static_templates ()
203+
204+ # 加载文章数据
205+ print ("加载文章数据..." )
206+ items = load_article_data (ARTICLE_JSON_FILE )
207+ print (f"成功加载 { len (items )} 条文章数据" )
208+
209+ if not items :
210+ print ("没有找到文章数据,无法生成HTML页面" )
211+ return None
212+
213+ # 按日期排序
214+ print ("按日期排序文章..." )
215+ items_sorted = sorted (items , key = lambda x : get_sortable_date (x .get ('date' , '' )), reverse = True )
216+
217+ # 提取公司列表
218+ print ("提取公司和标签列表..." )
219+ companies = sorted (list (set ([item .get ('company' , '未知' ) for item in items ])))
220+ tags = sorted (list (set ([tag for item in items for tag in item .get ('tags' , [])])))
221+ print (f"共提取 { len (companies )} 家公司,{ len (tags )} 个标签" )
222+
223+ # 生成表格行HTML
224+ print ("生成表格行HTML..." )
225+ table_rows = generate_table_rows (items_sorted )
226+ print (f"表格行生成完成" )
227+
228+ # 准备渲染上下文
229+ print ("准备渲染上下文..." )
230+ context = {
231+ 'TIMESTAMP' : int (time .time ()),
232+ 'LAST_UPDATED' : datetime .now ().strftime ('%Y-%m-%d %H:%M:%S' ),
233+ 'total_items' : len (items ),
234+ 'companies' : companies ,
235+ 'tags' : tags ,
236+ 'table_rows' : table_rows ,
237+ 'articles_json' : json .dumps (items_sorted , ensure_ascii = False , default = str )
238+ }
239+ print (f"上下文准备完成,键数量: { len (context .keys ())} " )
240+
241+ # 读取HTML模板文件,从output/static/templates目录读取
242+ template_file = os .path .join (OUTPUT_DIR , "static" , "templates" , "index.html.template" )
243+ with open (template_file , 'r' , encoding = 'utf-8' ) as f :
244+ html_template = f .read ()
245+ print (f"成功读取模板文件: { template_file } " )
246+
247+ # 渲染HTML内容
248+ print ("开始渲染HTML内容..." )
249+ html_content = render_template (html_template , context )
250+ print ("HTML渲染成功" )
251+
252+ # 生成输出文件路径,直接生成index.html
253+ output_file = os .path .join (OUTPUT_DIR , "index.html" )
254+
255+ # 写入文件
256+ print (f"写入HTML文件: { output_file } " )
257+ with open (output_file , 'w' , encoding = 'utf-8' ) as f :
258+ f .write (html_content )
259+
260+ print (f"HTML页面已成功生成: { output_file } " )
261+ return output_file
262+ except Exception as e :
263+ print (f"生成HTML页面失败: { e } " )
264+ import traceback
265+ print (f"完整错误栈:\n { traceback .format_exc ()} " )
266+ return None
267+
268+
269+
270+ def main ():
271+ """主函数"""
272+ parser = argparse .ArgumentParser (description = '生成大厂实践文章HTML页面' )
273+ args = parser .parse_args ()
274+
275+ # 生成HTML页面(直接生成index.html)
276+ html_file = generate_industry_html ()
277+
278+
279+ if __name__ == "__main__" :
280+ main ()
0 commit comments