Skip to content

Commit da7e9b1

Browse files
authored
fix: 修复文档状态部分问题 (#1699)
1 parent a37a618 commit da7e9b1

File tree

8 files changed

+98
-27
lines changed

8 files changed

+98
-27
lines changed

apps/common/event/listener_manage.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,8 @@ def get_aggregation_document_status_by_dataset_id(dataset_id):
181181
def aggregation_document_status():
182182
sql = get_file_content(
183183
os.path.join(PROJECT_DIR, "apps", "dataset", 'sql', 'update_document_status_meta.sql'))
184-
native_update({'document_custom_sql': QuerySet(Document).filter(dataset_id=dataset_id)}, sql)
184+
native_update({'document_custom_sql': QuerySet(Document).filter(dataset_id=dataset_id)}, sql,
185+
with_table_name=True)
185186

186187
return aggregation_document_status
187188

@@ -190,7 +191,7 @@ def get_aggregation_document_status_by_query_set(queryset):
190191
def aggregation_document_status():
191192
sql = get_file_content(
192193
os.path.join(PROJECT_DIR, "apps", "dataset", 'sql', 'update_document_status_meta.sql'))
193-
native_update({'document_custom_sql': queryset}, sql)
194+
native_update({'document_custom_sql': queryset}, sql, with_table_name=True)
194195

195196
return aggregation_document_status
196197

@@ -249,19 +250,23 @@ def embedding_by_document(document_id, embedding_model: Embeddings):
249250
"""
250251
if not try_lock('embedding' + str(document_id)):
251252
return
252-
max_kb.info(f"开始--->向量化文档:{document_id}")
253-
# 批量修改状态为PADDING
254-
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING, State.STARTED)
255253
try:
256-
# 删除文档向量数据
257-
VectorStore.get_embedding_vector().delete_by_document_id(document_id)
258-
259254
def is_the_task_interrupted():
260255
document = QuerySet(Document).filter(id=document_id).first()
261256
if document is None or Status(document.status)[TaskType.EMBEDDING] == State.REVOKE:
262257
return True
263258
return False
264259

260+
if is_the_task_interrupted():
261+
return
262+
max_kb.info(f"开始--->向量化文档:{document_id}")
263+
# 批量修改状态为PADDING
264+
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING,
265+
State.STARTED)
266+
267+
# 删除文档向量数据
268+
VectorStore.get_embedding_vector().delete_by_document_id(document_id)
269+
265270
# 根据段落进行向量化处理
266271
page(QuerySet(Paragraph).filter(document_id=document_id).values('id'), 5,
267272
ListenerManagement.get_embedding_paragraph_apply(embedding_model, is_the_task_interrupted,

apps/dataset/migrations/0011_document_status_meta_paragraph_status_meta_and_more.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
from common.event import ListenerManagement
88
from dataset.models import State, TaskType
99

10+
sql = """
11+
UPDATE "document"
12+
SET status ="replace"(status, '1', '3')
13+
"""
14+
1015

1116
def updateDocumentStatus(apps, schema_editor):
1217
ParagraphModel = apps.get_model('dataset', 'Paragraph')
@@ -43,5 +48,6 @@ class Migration(migrations.Migration):
4348
name='status',
4449
field=models.CharField(default=dataset.models.data_set.Status.__str__, max_length=20, verbose_name='状态'),
4550
),
51+
migrations.RunSQL(sql),
4652
migrations.RunPython(updateDocumentStatus)
4753
]

apps/dataset/serializers/document_serializers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,9 @@ def migrate(self, with_valid=True):
297297
ListenerManagement.update_status(QuerySet(Document).filter(id__in=document_id_list),
298298
TaskType.EMBEDDING,
299299
State.PENDING)
300+
ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id__in=document_id_list),
301+
TaskType.EMBEDDING,
302+
State.PENDING)
300303
embedding_by_document_list.delay(document_id_list, model_id)
301304
else:
302305
update_embedding_dataset_id(pid_list, target_dataset_id)

apps/dataset/task/generate.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,21 +51,28 @@ def generate_problem(paragraph_list):
5151
return generate_problem
5252

5353

54+
def get_is_the_task_interrupted(document_id):
55+
def is_the_task_interrupted():
56+
document = QuerySet(Document).filter(id=document_id).first()
57+
if document is None or Status(document.status)[TaskType.GENERATE_PROBLEM] == State.REVOKE:
58+
return True
59+
return False
60+
61+
return is_the_task_interrupted
62+
63+
5464
@celery_app.task(base=QueueOnce, once={'keys': ['document_id']},
5565
name='celery:generate_related_by_document')
5666
def generate_related_by_document_id(document_id, model_id, prompt):
5767
try:
68+
is_the_task_interrupted = get_is_the_task_interrupted(document_id)
69+
if is_the_task_interrupted():
70+
return
5871
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id),
5972
TaskType.GENERATE_PROBLEM,
6073
State.STARTED)
6174
llm_model = get_llm_model(model_id)
6275

63-
def is_the_task_interrupted():
64-
document = QuerySet(Document).filter(id=document_id).first()
65-
if document is None or Status(document.status)[TaskType.GENERATE_PROBLEM] == State.REVOKE:
66-
return True
67-
return False
68-
6976
# 生成问题函数
7077
generate_problem = get_generate_problem(llm_model, prompt,
7178
ListenerManagement.get_aggregation_document_status(
@@ -82,6 +89,12 @@ def is_the_task_interrupted():
8289
name='celery:generate_related_by_paragraph_list')
8390
def generate_related_by_paragraph_id_list(document_id, paragraph_id_list, model_id, prompt):
8491
try:
92+
is_the_task_interrupted = get_is_the_task_interrupted(document_id)
93+
if is_the_task_interrupted():
94+
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id),
95+
TaskType.GENERATE_PROBLEM,
96+
State.REVOKED)
97+
return
8598
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id),
8699
TaskType.GENERATE_PROBLEM,
87100
State.STARTED)

apps/embedding/task/embedding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def embedding_by_dataset(dataset_id, model_id):
102102
max_kb.info(f"数据集文档:{[d.name for d in document_list]}")
103103
for document in document_list:
104104
try:
105+
print(document.id, model_id)
105106
embedding_by_document.delay(document.id, model_id)
106107
except Exception as e:
107108
pass

apps/smartdoc/settings/lib.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@
3232
CELERY_WORKER_REDIRECT_STDOUTS_LEVEL = "INFO"
3333
CELERY_TASK_SOFT_TIME_LIMIT = 3600
3434
CELERY_WORKER_CANCEL_LONG_RUNNING_TASKS_ON_CONNECTION_LOSS = True
35+
CELERY_ACKS_LATE = True
36+
celery_once_path = os.path.join(celery_data_dir, "celery_once")
3537
CELERY_ONCE = {
3638
'backend': 'celery_once.backends.File',
37-
'settings': {'location': os.path.join(celery_data_dir, "celery_once")}
39+
'settings': {'location': celery_once_path}
3840
}
3941
CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
4042
CELERY_LOG_DIR = os.path.join(PROJECT_DIR, 'logs', 'celery')

ui/src/views/document/component/StatusTable.vue

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,19 @@
2424
</el-text>
2525
</el-col>
2626
<el-col :span="7">
27-
完成
28-
{{
29-
Object.keys(status.aggs ? status.aggs : {})
30-
.filter((k) => k == State.SUCCESS)
31-
.map((k) => status.aggs[k])
32-
.reduce((x: any, y: any) => x + y, 0)
33-
}}/{{ Object.values(status.aggs ? status.aggs : {}).reduce((x: any, y: any) => x + y, 0) }}
27+
<span
28+
:style="{ color: [State.FAILURE, State.REVOKED].includes(status.state) ? '#F54A45' : '' }"
29+
>
30+
完成
31+
{{
32+
Object.keys(status.aggs ? status.aggs : {})
33+
.filter((k) => k == State.SUCCESS)
34+
.map((k) => status.aggs[k])
35+
.reduce((x: any, y: any) => x + y, 0)
36+
}}/{{
37+
Object.values(status.aggs ? status.aggs : {}).reduce((x: any, y: any) => x + y, 0)
38+
}}</span
39+
>
3440
</el-col>
3541
<el-col :span="9">
3642
{{

ui/src/views/document/index.vue

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,25 @@
235235
<template #default="{ row }">
236236
<div v-if="datasetDetail.type === '0'">
237237
<span class="mr-4">
238-
<el-tooltip effect="dark" content="向量化" placement="top">
238+
<el-tooltip
239+
effect="dark"
240+
v-if="
241+
([State.STARTED, State.PENDING] as Array<string>).includes(
242+
getTaskState(row.status, TaskType.EMBEDDING)
243+
)
244+
"
245+
content="取消向量化"
246+
placement="top"
247+
>
248+
<el-button
249+
type="primary"
250+
text
251+
@click.stop="cancelTask(row, TaskType.EMBEDDING)"
252+
>
253+
<AppIcon iconName="app-close" style="font-size: 16px"></AppIcon>
254+
</el-button>
255+
</el-tooltip>
256+
<el-tooltip v-else effect="dark" content="向量化" placement="top">
239257
<el-button type="primary" text @click.stop="refreshDocument(row)">
240258
<AppIcon iconName="app-document-refresh" style="font-size: 16px"></AppIcon>
241259
</el-button>
@@ -255,9 +273,20 @@
255273
</el-button>
256274
<template #dropdown>
257275
<el-dropdown-menu>
258-
<el-dropdown-item @click="openGenerateDialog(row)">
276+
<el-dropdown-item
277+
v-if="
278+
([State.STARTED, State.PENDING] as Array<string>).includes(
279+
getTaskState(row.status, TaskType.GENERATE_PROBLEM)
280+
)
281+
"
282+
@click="cancelTask(row, TaskType.GENERATE_PROBLEM)"
283+
>
259284
<el-icon><Connection /></el-icon>
260-
生成关联问题
285+
取消生成问题
286+
</el-dropdown-item>
287+
<el-dropdown-item v-else @click="openGenerateDialog(row)">
288+
<el-icon><Connection /></el-icon>
289+
生成问题
261290
</el-dropdown-item>
262291
<el-dropdown-item @click="openDatasetDialog(row)">
263292
<AppIcon iconName="app-migrate"></AppIcon>
@@ -286,7 +315,11 @@
286315
<span class="mr-4">
287316
<el-tooltip
288317
effect="dark"
289-
v-if="getTaskState(row.status, TaskType.EMBEDDING) == State.STARTED"
318+
v-if="
319+
([State.STARTED, State.PENDING] as Array<string>).includes(
320+
getTaskState(row.status, TaskType.EMBEDDING)
321+
)
322+
"
290323
content="取消向量化"
291324
placement="top"
292325
>
@@ -318,7 +351,9 @@
318351
>
319352
<el-dropdown-item
320353
v-if="
321-
getTaskState(row.status, TaskType.GENERATE_PROBLEM) == State.STARTED
354+
([State.STARTED, State.PENDING] as Array<string>).includes(
355+
getTaskState(row.status, TaskType.GENERATE_PROBLEM)
356+
)
322357
"
323358
@click="cancelTask(row, TaskType.GENERATE_PROBLEM)"
324359
>

0 commit comments

Comments
 (0)