@@ -58,69 +58,83 @@ def get_result_to_save(self, ques_count):
5858 ]
5959 logger .debug (f"处理序列: { seq } " )
6060
61- # 收集所有内容和脚注
61+ # 收集所有内容
6262 all_content = []
63- all_footnotes = []
64- footnote_counter = 1
65- footnote_mapping = {} # 用于存储原始编号到新编号的映射
66-
67- # 第一遍:收集所有引用并建立映射
68- logger .info ("开始第一遍处理:收集引用并建立映射" )
6963 for key in seq :
7064 if key not in self .res :
7165 logger .debug (f"跳过不存在的键: { key } " )
7266 continue
73-
7467 content = self .res [key ]["response_content" ]
75- footnotes = self . res [ key ][ "footnotes" ]
68+ all_content . append ( content )
7669
77- if footnotes :
78- logger .debug (f"处理 { key } 的脚注,数量: { len (footnotes )} " )
79- for num , content in footnotes : # 直接解构元组
80- if num not in footnote_mapping :
81- footnote_mapping [num ] = str (footnote_counter )
82- footnote_counter += 1
70+ # 合并所有内容
71+ full_content = "\n " .join (all_content )
8372
84- logger .info (f"脚注映射完成,共有 { len (footnote_mapping )} 个脚注" )
73+ # 提取所有脚注引用 [^1], [^2] 等
74+ footnote_refs = re .findall (r"\[\^(\d+)\]" , full_content )
8575
86- # 第二遍:更新内容和脚注
87- logger .info ("开始第二遍处理:更新内容和脚注" )
88- for key in seq :
89- if key not in self .res :
90- continue
76+ # 提取所有脚注定义 [^1]: 内容
77+ footnote_defs = re .findall (
78+ r"\[\^(\d+)\]:\s*(.+?)(?=\n\[\^|\n\n|\Z)" , full_content , re .DOTALL
79+ )
9180
92- content = self . res [ key ][ "response_content" ]
93- footnotes = self . res [ key ][ "footnotes" ]
81+ logger . info ( f"找到脚注引用: { set ( footnote_refs ) } " )
82+ logger . info ( f"找到脚注定义: { [ def_num for def_num , _ in footnote_defs ] } " )
9483
95- # 更新内容中的引用编号
96- if footnotes :
97- logger .debug (f"更新 { key } 的内容和脚注" )
98- # 更新正文中的引用
99- for old_num , new_num in footnote_mapping .items ():
100- content = content .replace (f"[^{ old_num } ]" , f"[^{ new_num } ]" )
84+ # 创建脚注映射和内容
85+ footnote_mapping = {}
86+ footnote_contents = {}
87+ footnote_counter = 1
10188
102- # 更新脚注
103- updated_footnotes = []
104- for num , content in footnotes : # 直接解构元组
105- new_num = footnote_mapping [num ]
106- updated_footnote = f"[^{ new_num } ]: { content .strip ()} "
107- updated_footnotes .append (updated_footnote )
89+ # 收集所有唯一的脚注编号(来自引用和定义)
90+ all_footnote_nums = set (footnote_refs )
91+ for def_num , def_content in footnote_defs :
92+ all_footnote_nums .add (def_num )
93+ footnote_contents [def_num ] = def_content .strip ()
10894
109- all_footnotes .extend (updated_footnotes )
95+ # 为每个脚注分配新编号
96+ for old_num in sorted (all_footnote_nums , key = int ):
97+ footnote_mapping [old_num ] = str (footnote_counter )
98+ footnote_counter += 1
11099
111- all_content . append ( content )
100+ logger . info ( f"脚注映射: { footnote_mapping } " )
112101
113- # 合并所有内容和脚注
114- final_content = "\n " .join (all_content )
115- if all_footnotes :
116- # 对脚注按编号排序
117- sorted_footnotes = sorted (
118- all_footnotes , key = lambda x : int (re .search (r"\[\^(\d+)\]:" , x ).group (1 ))
102+ # 更新正文中的脚注引用编号
103+ processed_content = full_content
104+ for old_num , new_num in footnote_mapping .items ():
105+ processed_content = processed_content .replace (
106+ f"[^{ old_num } ]" , f"[^{ new_num } ]"
119107 )
120- final_content += "\n \n " + "\n " .join (sorted_footnotes )
121108
122- logger .info (f"结果处理完成,最终内容长度: { len (final_content )} " )
123- return final_content
109+ # 移除原有的脚注定义(它们会被重新添加到最后)
110+ processed_content = re .sub (
111+ r"\[\^\d+\]:\s*.+?(?=\n\[\^|\n\n|\Z)" ,
112+ "" ,
113+ processed_content ,
114+ flags = re .DOTALL ,
115+ )
116+
117+ # 清理多余的空行
118+ processed_content = re .sub (r"\n{3,}" , "\n \n " , processed_content )
119+
120+ # 添加统一的参考文献部分
121+ if footnote_mapping :
122+ processed_content += "\n \n ## 参考文献\n \n "
123+
124+ # 按新编号顺序添加脚注
125+ for old_num in sorted (
126+ footnote_mapping .keys (), key = lambda x : int (footnote_mapping [x ])
127+ ):
128+ new_num = footnote_mapping [old_num ]
129+ if old_num in footnote_contents :
130+ processed_content += f"[^{ new_num } ]: { footnote_contents [old_num ]} \n "
131+ else :
132+ logger .warning (f"脚注 { old_num } 被引用但未找到定义" )
133+
134+ logger .info (f"参考文献部分添加完成,共有 { len (footnote_mapping )} 个脚注" )
135+
136+ logger .info (f"结果处理完成,最终内容长度: { len (processed_content )} " )
137+ return processed_content
124138
125139 def save_result (self , ques_count ):
126140 res_path = os .path .join (self .work_dir , "res.md" )
0 commit comments