Skip to content

Commit 81d0ed8

Browse files
committed
fix(generation_service): ensure processed chunks are incremented regardless of question generation success
1 parent 9775425 commit 81d0ed8

File tree

2 files changed

+33
-23
lines changed

2 files changed

+33
-23
lines changed

runtime/datamate-python/app/module/generation/service/generation_service.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,20 @@ async def _process_single_chunk_qa(
202202
chunk_index = chunk.chunk_index
203203
chunk_text = chunk.chunk_content or ""
204204
if not chunk_text.strip():
205-
logger.warning(f"Empty chunk text for file_task={file_task.id}, chunk_index={chunk_index}")
205+
logger.warning(
206+
f"Empty chunk text for file_task={file_task.id}, chunk_index={chunk_index}"
207+
)
208+
# 无论成功或失败,均视为该 chunk 已处理完成
209+
try:
210+
await self._increment_processed_chunks(file_task.id, 1)
211+
except Exception as e:
212+
logger.exception(
213+
f"Failed to increment processed_chunks for file_task={file_task.id}, chunk_index={chunk_index}: {e}"
214+
)
206215
return False
207216

217+
success_any = False
218+
208219
# 1. 生成问题
209220
try:
210221
questions = await self._generate_questions_for_one_chunk(
@@ -216,31 +227,30 @@ async def _process_single_chunk_qa(
216227
logger.error(
217228
f"Generate questions failed for file_task={file_task.id}, chunk_index={chunk_index}: {e}"
218229
)
219-
return False
230+
questions = []
220231

221232
if not questions:
222233
logger.info(
223234
f"No questions generated for file_task={file_task.id}, chunk_index={chunk_index}"
224235
)
225-
return False
226-
227-
# 2. 针对每个问题生成答案并入库
228-
success_any = await self._generate_answers_for_one_chunk(
229-
file_task=file_task,
230-
chunk=chunk,
231-
questions=questions,
232-
answer_cfg=answer_cfg,
233-
answer_chat=answer_chat,
234-
)
236+
else:
237+
# 2. 针对每个问题生成答案并入库
238+
qa_success = await self._generate_answers_for_one_chunk(
239+
file_task=file_task,
240+
chunk=chunk,
241+
questions=questions,
242+
answer_cfg=answer_cfg,
243+
answer_chat=answer_chat,
244+
)
245+
success_any = bool(qa_success)
235246

236-
# 每次处理完一个chunk,若至少生成一条QA,则安全更新已处理的chunk数量,避免并发冲突
237-
if success_any:
238-
try:
239-
await self._increment_processed_chunks(file_task.id, 1)
240-
except Exception as e:
241-
logger.exception(
242-
f"Failed to increment processed_chunks for file_task={file_task.id}, chunk_index={chunk_index}: {e}"
243-
)
247+
# 无论本 chunk 处理是否成功,都增加 processed_chunks 计数,避免任务长时间卡住
248+
try:
249+
await self._increment_processed_chunks(file_task.id, 1)
250+
except Exception as e:
251+
logger.exception(
252+
f"Failed to increment processed_chunks for file_task={file_task.id}, chunk_index={chunk_index}: {e}"
253+
)
244254

245255
return success_any
246256

runtime/datamate-python/app/module/generation/service/prompt.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
1. 所有问题必须严格依据原文内容,不得添加外部信息或假设情境。
2626
2. 问题需覆盖文本的不同主题、层级或视角,避免集中于单一片段。
2727
3. 禁止输出与材料元信息相关的问题(如作者、章节、目录等)。
28-
4. 问题不得包含“报告/文章/文献/表格中提到”等表述,需自然流畅
29-
5. 输出不少于 {{number}} 个问题,且保持格式一致
28+
4. 提问时请假设没有相应的文章可供参考,因此不要在问题中使用"这个"或"这些"等指示代词,也不得包含“报告/文章/文献/表格中提到”等表述。
29+
5. 输出不少于 {{number}} 个问题,问题语言与原文主要语言保持一致
3030
3131
## Output Format:
3232
- 使用合法的 JSON 数组,仅包含字符串元素。
@@ -38,7 +38,7 @@
3838
3939
## Output Example:
4040
```
41-
["人工智能伦理框架应包含哪些核心要素?", "民法典对个人数据保护有哪些新规定"]
41+
["人工智能伦理框架应包含哪些核心要素?", "民法典对个人数据保护有哪些新规定"]
4242
```
4343
4444
## Text to Analyze:

0 commit comments

Comments
 (0)