3838from dataset .serializers .common_serializers import list_paragraph , MetaSerializer , ProblemParagraphManage , \
3939 get_embedding_model_by_dataset_id , get_embedding_model_id_by_dataset_id
4040from dataset .serializers .document_serializers import DocumentSerializers , DocumentInstanceSerializer
41- from dataset .task import sync_web_dataset
41+ from dataset .task import sync_web_dataset , sync_replace_web_dataset
4242from embedding .models import SearchMode
4343from embedding .task import embedding_by_dataset , delete_embedding_by_dataset
4444from setting .models import AuthOperate
@@ -602,15 +602,18 @@ def handler(child_link: ChildLink, response: Fork.Response):
602602 document_name = child_link .tag .text if child_link .tag is not None and len (
603603 child_link .tag .text .strip ()) > 0 else child_link .url
604604 paragraphs = get_split_model ('web.md' ).parse (response .content )
605- first = QuerySet (Document ).filter (meta__source_url = child_link .url , dataset = dataset ).first ()
605+ print (child_link .url .strip ())
606+ first = QuerySet (Document ).filter (meta__source_url = child_link .url .strip (),
607+ dataset = dataset ).first ()
606608 if first is not None :
607609 # 如果存在,使用文档同步
608610 DocumentSerializers .Sync (data = {'document_id' : first .id }).sync ()
609611 else :
610612 # 插入
611613 DocumentSerializers .Create (data = {'dataset_id' : dataset .id }).save (
612614 {'name' : document_name , 'paragraphs' : paragraphs ,
613- 'meta' : {'source_url' : child_link .url , 'selector' : dataset .meta .get ('selector' )},
615+ 'meta' : {'source_url' : child_link .url .strip (),
616+ 'selector' : dataset .meta .get ('selector' )},
614617 'type' : Type .web }, with_valid = True )
615618 except Exception as e :
616619 logging .getLogger ("max_kb_error" ).error (f'{ str (e )} :{ traceback .format_exc ()} ' )
@@ -624,7 +627,7 @@ def replace_sync(self, dataset):
624627 """
625628 url = dataset .meta .get ('source_url' )
626629 selector = dataset .meta .get ('selector' ) if 'selector' in dataset .meta else None
627- sync_web_dataset .delay (str (dataset .id ), url , selector )
630+ sync_replace_web_dataset .delay (str (dataset .id ), url , selector )
628631
629632 def complete_sync (self , dataset ):
630633 """
0 commit comments