Skip to content

Commit a603bb3

Browse files
committed
refactor: 优化大量数据批量入库
1 parent 20920e6 commit a603bb3

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

apps/common/util/common.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,12 @@ def run(*args, **kwargs):
102102
return run
103103

104104
return inner
105+
106+
107+
def bulk_create_in_batches(model, data, batch_size=1000):
108+
if len(data) == 0:
109+
return
110+
for i in range(0, len(data), batch_size):
111+
batch = data[i:i + batch_size]
112+
model.objects.bulk_create(batch)
113+

apps/dataset/serializers/document_serializers.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from common.handle.impl.table.xlsx_parse_table_handle import XlsxSplitHandle
4242
from common.handle.impl.text_split_handle import TextSplitHandle
4343
from common.mixins.api_mixin import ApiMixin
44-
from common.util.common import post, flat_map
44+
from common.util.common import post, flat_map, bulk_create_in_batches
4545
from common.util.field_message import ErrMessage
4646
from common.util.file_util import get_file_content
4747
from common.util.fork import Fork
@@ -955,12 +955,11 @@ def batch_save(self, instance_list: List[Dict], with_valid=True):
955955
# 插入文档
956956
QuerySet(Document).bulk_create(document_model_list) if len(document_model_list) > 0 else None
957957
# 批量插入段落
958-
QuerySet(Paragraph).bulk_create(paragraph_model_list) if len(paragraph_model_list) > 0 else None
958+
bulk_create_in_batches(Paragraph, paragraph_model_list, batch_size=1000)
959959
# 批量插入问题
960-
QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
960+
bulk_create_in_batches(Problem, problem_model_list, batch_size=1000)
961961
# 批量插入关联问题
962-
QuerySet(ProblemParagraphMapping).bulk_create(problem_paragraph_mapping_list) if len(
963-
problem_paragraph_mapping_list) > 0 else None
962+
bulk_create_in_batches(ProblemParagraphMapping, problem_paragraph_mapping_list, batch_size=1000)
964963
# 查询文档
965964
query_set = QuerySet(model=Document)
966965
if len(document_model_list) == 0:

0 commit comments

Comments
 (0)