|
3 | 3 | from app.utils.data_recorder import DataRecorder |
4 | 4 | from app.schemas.A2A import WriterResponse |
5 | 5 | from app.utils.log_util import logger |
| 6 | +from app.utils.common_utils import split_footnotes |
| 7 | +import json |
6 | 8 |
|
7 | 9 |
|
8 | 10 | class UserOutput: |
@@ -42,101 +44,71 @@ def get_model_build_solve(self) -> str: |
42 | 44 | return model_build_solve |
43 | 45 |
|
44 | 46 | def get_result_to_save(self, ques_count): |
| 47 | + # 保存 res.json 文件 |
| 48 | + |
45 | 49 | logger.info(f"开始处理结果保存,问题数量: {ques_count}") |
| 50 | + |
46 | 51 | # 动态顺序获取拼接res value,正确拼接顺序 |
47 | 52 | ques_str = [f"ques{i}" for i in range(1, ques_count + 1)] |
| 53 | + |
| 54 | + # 修改:调整章节顺序,确保符合论文结构 |
48 | 55 | seq = [ |
49 | | - "firstPage", |
50 | | - "RepeatQues", |
51 | | - "analysisQues", |
52 | | - "modelAssumption", |
53 | | - "symbol", |
54 | | - "eda", |
55 | | - *ques_str, |
56 | | - "sensitivity_analysis", |
57 | | - "judge", |
| 56 | + "firstPage", # 标题、摘要、关键词 |
| 57 | + "RepeatQues", # 一、问题重述 |
| 58 | + "analysisQues", # 二、问题分析 |
| 59 | + "modelAssumption", # 三、模型假设 |
| 60 | + "symbol", # 四、符号说明和数据预处理 |
| 61 | + "eda", # 四、数据预处理(EDA部分) |
| 62 | + *ques_str, # 五、模型的建立与求解(问题1、2...) |
| 63 | + "sensitivity_analysis", # 六、模型的分析与检验 |
| 64 | + "judge", # 七、模型的评价、改进与推广 |
58 | 65 | ] |
59 | | - logger.debug(f"处理序列: {seq}") |
60 | 66 |
|
61 | | - # 收集所有内容 |
| 67 | + # 用于存储所有脚注 |
| 68 | + all_footnotes: dict[str, str] = {} |
| 69 | + # 收集所有内容和处理脚注 |
62 | 70 | all_content = [] |
| 71 | + |
63 | 72 | for key in seq: |
64 | 73 | if key not in self.res: |
65 | 74 | logger.debug(f"跳过不存在的键: {key}") |
66 | 75 | continue |
| 76 | + |
67 | 77 | content = self.res[key]["response_content"] |
68 | | - all_content.append(content) |
| 78 | + # 分离正文和脚注 |
| 79 | + main_text, footnotes = split_footnotes(content) |
| 80 | + # 存储脚注内容(去重) |
| 81 | + for _, note_content in footnotes: |
| 82 | + all_footnotes[note_content] = note_content |
| 83 | + all_content.append(main_text) |
69 | 84 |
|
70 | 85 | # 合并所有内容 |
71 | 86 | full_content = "\n".join(all_content) |
72 | 87 |
|
73 | | - # 提取所有脚注引用 [^1], [^2] 等 |
74 | | - footnote_refs = re.findall(r"\[\^(\d+)\]", full_content) |
75 | | - |
76 | | - # 提取所有脚注定义 [^1]: 内容 |
77 | | - footnote_defs = re.findall( |
78 | | - r"\[\^(\d+)\]:\s*(.+?)(?=\n\[\^|\n\n|\Z)", full_content, re.DOTALL |
79 | | - ) |
80 | | - |
81 | | - logger.info(f"找到脚注引用: {set(footnote_refs)}") |
82 | | - logger.info(f"找到脚注定义: {[def_num for def_num, _ in footnote_defs]}") |
83 | | - |
84 | | - # 创建脚注映射和内容 |
85 | | - footnote_mapping = {} |
86 | | - footnote_contents = {} |
87 | | - footnote_counter = 1 |
88 | | - |
89 | | - # 收集所有唯一的脚注编号(来自引用和定义) |
90 | | - all_footnote_nums = set(footnote_refs) |
91 | | - for def_num, def_content in footnote_defs: |
92 | | - all_footnote_nums.add(def_num) |
93 | | - footnote_contents[def_num] = def_content.strip() |
94 | | - |
95 | | - # 为每个脚注分配新编号 |
96 | | - for old_num in sorted(all_footnote_nums, key=int): |
97 | | - footnote_mapping[old_num] = str(footnote_counter) |
98 | | - footnote_counter += 1 |
99 | | - |
100 | | - logger.info(f"脚注映射: {footnote_mapping}") |
101 | | - |
102 | | - # 更新正文中的脚注引用编号 |
103 | | - processed_content = full_content |
104 | | - for old_num, new_num in footnote_mapping.items(): |
105 | | - processed_content = processed_content.replace( |
106 | | - f"[^{old_num}]", f"[^{new_num}]" |
107 | | - ) |
108 | | - |
109 | | - # 移除原有的脚注定义(它们会被重新添加到最后) |
110 | | - processed_content = re.sub( |
111 | | - r"\[\^\d+\]:\s*.+?(?=\n\[\^|\n\n|\Z)", |
112 | | - "", |
113 | | - processed_content, |
114 | | - flags=re.DOTALL, |
115 | | - ) |
| 88 | + # 重新编号脚注引用 |
| 89 | + footnote_mapping = {} # 旧编号到新编号的映射 |
| 90 | + for i, content in enumerate(all_footnotes.values(), 1): |
| 91 | + footnote_mapping[content] = str(i) |
116 | 92 |
|
117 | | - # 清理多余的空行 |
118 | | - processed_content = re.sub(r"\n{3,}", "\n\n", processed_content) |
| 93 | + # 更新正文中的脚注引用 |
| 94 | + for old_content, new_num in footnote_mapping.items(): |
| 95 | + # 在正文中查找并替换脚注引用 |
| 96 | + pattern = r"\[\^\d+\]" |
| 97 | + # 只替换一次,确保引用的一致性 |
| 98 | + full_content = re.sub(pattern, f"[^{new_num}]", full_content, count=1) |
119 | 99 |
|
120 | | - # 添加统一的参考文献部分 |
121 | | - if footnote_mapping: |
122 | | - processed_content += "\n\n## 参考文献\n\n" |
| 100 | + # 添加重新编号后的脚注到文档末尾 |
| 101 | + if all_footnotes: |
| 102 | + full_content += "\n\n## 参考文献\n\n" |
| 103 | + for content, num in footnote_mapping.items(): |
| 104 | + full_content += f"[^{num}]: {content}\n\n" |
123 | 105 |
|
124 | | - # 按新编号顺序添加脚注 |
125 | | - for old_num in sorted( |
126 | | - footnote_mapping.keys(), key=lambda x: int(footnote_mapping[x]) |
127 | | - ): |
128 | | - new_num = footnote_mapping[old_num] |
129 | | - if old_num in footnote_contents: |
130 | | - processed_content += f"[^{new_num}]: {footnote_contents[old_num]}\n" |
131 | | - else: |
132 | | - logger.warning(f"脚注 {old_num} 被引用但未找到定义") |
133 | | - |
134 | | - logger.info(f"参考文献部分添加完成,共有 {len(footnote_mapping)} 个脚注") |
135 | | - |
136 | | - logger.info(f"结果处理完成,最终内容长度: {len(processed_content)}") |
137 | | - return processed_content |
| 106 | + return full_content |
138 | 107 |
|
139 | 108 | def save_result(self, ques_count): |
| 109 | + with open(os.path.join(self.work_dir, "res.json"), "w", encoding="utf-8") as f: |
| 110 | + json.dump(self.res, f, ensure_ascii=False, indent=4) |
| 111 | + |
140 | 112 | res_path = os.path.join(self.work_dir, "res.md") |
141 | 113 | with open(res_path, "w", encoding="utf-8") as f: |
142 | 114 | f.write(self.get_result_to_save(ques_count)) |
0 commit comments