Add template deletion function​ (#37)

z275748353 · zhanglongbin · HaiHui886 · web-flow · commit c175f076ff37 · 2025-10-30T18:35:58.000+08:00
* 1.Add template deletion function​ 2.Celery node deletion (admin only, offline status) * Fix the bug of dataflow with ID #29 * Fix the bug of dataflow with ID #31 * Fix the bug of dataflow with ID #35 * Fix the bug of dataflow with ID #36 --------- Co-authored-by: zhanglongbin <zhanglongbin@goboosoft.com> Co-authored-by: HaiHui <154984842+HaiHui886@users.noreply.github.com>
diff --git a/data_celery/datasource/mongo/tasks.py b/data_celery/datasource/mongo/tasks.py
@@ -1,23 +1,52 @@
 import shutil
 from data_celery.main import celery_app
-import time,os,json
+import time, os, json
 from data_server.database.session import get_sync_session
 from sqlalchemy.orm import Session
-from data_server.datasource.DatasourceModels import CollectionTask,DataSourceTaskStatusEnum,DataSourceTypeEnum
+from data_server.datasource.DatasourceModels import CollectionTask, DataSourceTaskStatusEnum, DataSourceTypeEnum
 from data_celery.db.DatasourceManager import get_collection_task_by_uid
 from data_celery.utils import (ensure_directory_exists,
                                get_current_ip, get_current_time, get_datasource_temp_parquet_dir,
                                ensure_directory_exists_remove, get_datasource_csg_hub_server_dir)
 from data_server.datasource.services.datasource import get_datasource_connector
-from data_celery.mongo_tools.tools import insert_datasource_run_task_log_info,insert_datasource_run_task_log_error
+from data_celery.mongo_tools.tools import insert_datasource_run_task_log_info, insert_datasource_run_task_log_error
 from data_engine.exporter.load import load_exporter
 from pathlib import Path
 import pandas as pd
 from loguru import logger
 
+# Import BSON types for MongoDB ObjectId conversion
+from datetime import datetime, date
+
+try:
+    from bson import ObjectId
+    from bson.errors import InvalidId
+
+    BSON_AVAILABLE = True
+except ImportError:
+    BSON_AVAILABLE = False
+    ObjectId = None
+
+
+def convert_mongo_document(doc):
+    """
+    Convert MongoDB document to JSON-serializable format.
+    Handles ObjectId, datetime, and other BSON types.
+    """
+    if isinstance(doc, dict):
+        return {key: convert_mongo_document(value) for key, value in doc.items()}
+    elif isinstance(doc, list):
+        return [convert_mongo_document(item) for item in doc]
+    elif BSON_AVAILABLE and isinstance(doc, ObjectId):
+        return str(doc)
+    elif isinstance(doc, (datetime, date)):
+        return doc.isoformat()
+    else:
+        return doc
+
 
 @celery_app.task(name="collection_mongo_task")
-def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
+def collection_mongo_task(task_uid: str, user_name: str, user_token: str):
     """
     Collection task
     Args:
@@ -64,7 +93,7 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
         collection_task.start_run_at = get_current_time()
         db_session.commit()
         # Read data source
-        extra_config = collection_task.datasource.extra_config
+        extra_config = json.loads(collection_task.datasource.extra_config)
         if not extra_config:
             collection_task.task_status = DataSourceTaskStatusEnum.ERROR.value
             insert_datasource_run_task_log_error(task_uid, f"Task with UID {task_uid} has no extra configuration.")
@@ -76,13 +105,19 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
             return False
         mongo_config = extra_config["mongo"]
         max_line = 10000
-        csg_hub_dataset_id = 0
+        csg_hub_dataset_id = ''
         csg_hub_dataset_default_branch = "main"
         if "csg_hub_dataset_default_branch" in extra_config:
             csg_hub_dataset_default_branch = extra_config["csg_hub_dataset_default_branch"]
-        if "csg_hub_dataset_id" in extra_config and isinstance(extra_config['csg_hub_dataset_id'], int):
+        if "csg_hub_dataset_id" in extra_config:
             csg_hub_dataset_id = extra_config["csg_hub_dataset_id"]
-        if csg_hub_dataset_id <= 0:
+        # Read csg_hub_dataset_name if provided, otherwise use default branch
+        csg_hub_dataset_name = None
+        if "csg_hub_dataset_name" in extra_config and extra_config['csg_hub_dataset_name'] != '':
+            csg_hub_dataset_name = extra_config["csg_hub_dataset_name"]
+        else:
+            csg_hub_dataset_name = csg_hub_dataset_default_branch
+        if csg_hub_dataset_id is None or csg_hub_dataset_id == '':
             collection_task.task_status = DataSourceTaskStatusEnum.ERROR.value
             insert_datasource_run_task_log_error(task_uid, f"Task with UID {task_uid} has no CSG Hub Dataset ID.")
             return False
@@ -96,7 +131,8 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
         connector = get_datasource_connector(collection_task.datasource)
         if not connector.test_connection():
             collection_task.task_status = DataSourceTaskStatusEnum.ERROR.value
-            insert_datasource_run_task_log_error(task_uid, f"Task with UID {task_uid} failed to connect to the database.")
+            insert_datasource_run_task_log_error(task_uid,
+                                                 f"Task with UID {task_uid} failed to connect to the database.")
             return False
 
         total_count = 0
@@ -117,11 +153,22 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
                 while True:
                     # Execute pagination query (specific implementation depends on connector details)
                     rows = connector.query_collection(collection_name, offset=(page - 1) * page_size,
-                                                 limit=page_size)
+                                                      limit=page_size)
 
                     if not rows:
                         break  # If there is no more data, exit the loop
 
+                    # Add rows to buffer, converting MongoDB types to JSON-serializable format
+                    if isinstance(rows, list):
+                        # Convert each document to handle ObjectId and other BSON types
+                        converted_rows = [convert_mongo_document(row) for row in rows]
+                        rows_buffer.extend(converted_rows)
+                    else:
+                        # If rows is a generator or iterator, convert to list first
+                        rows_list = list(rows)
+                        converted_rows = [convert_mongo_document(row) for row in rows_list]
+                        rows_buffer.extend(converted_rows)
+
                     # If the number of rows in the buffer list reaches or exceeds the maximum number of rows, write to the file and clear the buffer list
                     if len(rows_buffer) >= max_line:
                         file_path = os.path.join(table_dir, f"data_{file_index:04d}.parquet")
@@ -130,7 +177,8 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
                         current_file_row_count += len(rows_buffer)
                         records_count += len(rows_buffer)
                         collection_task.records_count = records_count
-                        insert_datasource_run_task_log_info(task_uid, f"Task with UID {task_uid} get data count {records_count}...")
+                        insert_datasource_run_task_log_info(task_uid,
+                                                            f"Task with UID {task_uid} get data count {records_count}...")
                         db_session.commit()
                         file_index += 1
                         rows_buffer = []  # Clear the buffer list
@@ -143,16 +191,18 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
                     current_file_row_count += len(rows_buffer)
                     records_count += len(rows_buffer)
                     collection_task.records_count = records_count
-                    insert_datasource_run_task_log_info(task_uid, f"Task with UID {task_uid} get data count {records_count}...")
+                    insert_datasource_run_task_log_info(task_uid,
+                                                        f"Task with UID {task_uid} get data count {records_count}...")
                     db_session.commit()
 
             except Exception as e:
-                insert_datasource_run_task_log_error(task_uid, f"Task with UID {task_uid} failed to get collection document {collection_name}: {e}")
+                insert_datasource_run_task_log_error(task_uid,
+                                                     f"Task with UID {task_uid} failed to get collection document {collection_name}: {e}")
         collection_task.records_count = total_count
         collection_task.total_count = total_count
         db_session.commit()
         upload_to_csg_hub_server(csg_hub_dataset_id,
-                                 csg_hub_dataset_default_branch,
+                                 csg_hub_dataset_name,
                                  user_name, user_token, db_session,
                                  collection_task, datasource_temp_parquet_dir,
                                  datasource_csg_hub_server_dir)
@@ -176,15 +226,15 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
     return True
 
 
-def upload_to_csg_hub_server(csg_hub_dataset_id: int,
+def upload_to_csg_hub_server(csg_hub_dataset_id: str,
                              csg_hub_dataset_default_branch: str,
-                             user_name: str,user_token: str,db_session: Session,
-                             collection_task: CollectionTask,datasource_temp_json_dir: str,
+                             user_name: str, user_token: str, db_session: Session,
+                             collection_task: CollectionTask, datasource_temp_json_dir: str,
                              datasource_csg_hub_server_dir: str):
     """
     Upload to CSG Hub server
     Args:
-        csg_hub_dataset_id (int): CSG Hub dataset ID
+        csg_hub_dataset_id (str): CSG Hub dataset ID
         csg_hub_dataset_default_branch (str): CSG Hub dataset default branch
         user_name (str): User name
         user_token (str): User token
@@ -198,26 +248,54 @@ def upload_to_csg_hub_server(csg_hub_dataset_id: int,
         # Upload to CSG Hub server
 
         ensure_directory_exists_remove(datasource_csg_hub_server_dir)
-        insert_datasource_run_task_log_info(collection_task.task_uid, f"Starting upload csg hub-server the task[{collection_task.task_uid}]...")
+        insert_datasource_run_task_log_info(collection_task.task_uid,
+                                            f"Starting upload csg hub-server the task[{collection_task.task_uid}]...")
         exporter = load_exporter(
             export_path=datasource_temp_json_dir,
-            repo_id=str(csg_hub_dataset_id),
+            repo_id=csg_hub_dataset_id,
             branch=csg_hub_dataset_default_branch,
             user_name=user_name,
             user_token=user_token,
             work_dir=datasource_csg_hub_server_dir
         )
-        upload_path: Path = Path(datasource_csg_hub_server_dir)
+        upload_path: Path = Path(datasource_temp_json_dir)
+        # Check whether the uploaded directory exists and is not empty
+        if not os.path.exists(upload_path):
+            insert_datasource_run_task_log_error(collection_task.task_uid,
+                                                 f"the task[{collection_task.task_uid}] upload csg hub-server fail: upload path {upload_path} does not exist")
+            return False
+
+        # List all files in the upload directory for debugging
+        file_list = []
+        for root, dirs, files in os.walk(upload_path):
+            for file in files:
+                file_list.append(os.path.join(root, file))
+        insert_datasource_run_task_log_info(collection_task.task_uid,
+                                            f"Files to upload: {len(file_list)} files found in {upload_path}")
+        if len(file_list) == 0:
+            insert_datasource_run_task_log_error(collection_task.task_uid,
+                                                 f"the task[{collection_task.task_uid}] upload csg hub-server fail: upload path {upload_path} is empty")
+            return False
+
         output_branch_name = exporter.export_from_files(upload_path)
 
         if output_branch_name:
             collection_task.csg_hub_branch = output_branch_name
             db_session.commit()
-            insert_datasource_run_task_log_info(collection_task.task_uid, f"the task[{collection_task.task_uid}] upload csg hub-server success...")
+            insert_datasource_run_task_log_info(collection_task.task_uid,
+                                                f"the task[{collection_task.task_uid}] upload csg hub-server success...")
         else:
-            insert_datasource_run_task_log_error(collection_task.task_uid, f"the task[{collection_task.task_uid}] upload csg hub-server fail...")
+            insert_datasource_run_task_log_error(collection_task.task_uid,
+                                                 f"the task[{collection_task.task_uid}] upload csg hub-server fail: export_from_files returned None")
     except Exception as e:
         logger.error(e)
-        insert_datasource_run_task_log_error(collection_task.task_uid,f"Task UID {collection_task.task_uid} Error occurred while uploading to CSG Hub server: {e}")
+        error_msg = str(e)
+        # Check if this is a "nothing to commit" error
+        if "nothing to commit" in error_msg.lower() or "working tree clean" in error_msg.lower():
+            insert_datasource_run_task_log_error(collection_task.task_uid,
+                                                 f"the task[{collection_task.task_uid}] upload csg hub-server fail: No files to commit. This may happen if: 1) Files are already committed in the branch, 2) Files are ignored by .gitignore, 3) File paths are incorrect. Error: {error_msg}")
+        else:
+            insert_datasource_run_task_log_error(collection_task.task_uid,
+                                                 f"Task UID {collection_task.task_uid} Error occurred while uploading to CSG Hub server: {error_msg}")
         return False
     return True
diff --git a/data_server/algo_templates/mapper/algo_template_mapper.py b/data_server/algo_templates/mapper/algo_template_mapper.py
@@ -104,7 +104,8 @@ def delete_template_by_id(db: Session, template_id: int, user_id: str) -> bool:
 
     template = db.query(AlgoTemplate).filter(
         AlgoTemplate.id == template_id,
-        AlgoTemplate.user_id == user_id
+        AlgoTemplate.user_id == user_id,
+        AlgoTemplate.buildin == False
     ).first()
     
     if not template:
diff --git a/data_server/algo_templates/utils/parse_algo_dslText.py b/data_server/algo_templates/utils/parse_algo_dslText.py
@@ -1,5 +1,6 @@
 import yaml
 from collections import deque, defaultdict
+from loguru import logger
 
 
 def convert_raw_to_processed(raw_yaml: str) -> str:
@@ -27,6 +28,12 @@ def convert_raw_to_processed(raw_yaml: str) -> str:
         source = edge['source']
         target = edge['target']
 
+        if source not in id_to_node:
+            logger.error(f"edges中引用的source节点 '{source}' 不存在于process中！")
+            raise ValueError(f"edges中引用的source节点 '{source}' 不存在于process中！")
+        if target not in id_to_node:
+            logger.error(f"edges中引用的target节点 '{target}' 不存在于process中！")
+            raise ValueError(f"edges中引用的target节点 '{target}' 不存在于process中！")
 
         adj[source].append(target)
         in_degree[target] += 1
diff --git a/data_server/api/endpoints/algo_templates.py b/data_server/api/endpoints/algo_templates.py
@@ -133,7 +133,7 @@ async def create_algo_template(
         
     except Exception as e:
         logger.error(f"创建算法模板失败: {e}")
-        return response_fail(msg="创建失败")
+        return response_fail(msg="创建模版失败:" + str(e))
     finally:
         db.close()
 
@@ -179,7 +179,7 @@ async def update_algo_template(
         
     except Exception as e:
         logger.error(f"更新算法模板失败: {e}")
-        return response_fail(msg="更新失败")
+        return response_fail(msg="算法更新失败:" + str(e))
     finally:
         db.close()
 
diff --git a/data_server/api/endpoints/celery_server.py b/data_server/api/endpoints/celery_server.py
@@ -6,7 +6,7 @@
 from loguru import logger
 from data_server.database.session import get_sync_session
 from data_server.schemas.responses import response_success, response_fail
-from data_celery.redis_tools.tools import get_celery_server_list
+from data_celery.redis_tools.tools import get_celery_server_list, del_celery_server_list
 from data_server.database.session import get_celery_worker_redis_db,get_celery_info_details_key
 from data_celery.utils import get_timestamp
 
@@ -56,3 +56,27 @@ async def get_celery_server_list_api(isadmin: Annotated[bool | None, Header(alia
         return response_fail(msg="获取Celery服务器列表失败")
 
 
+@router.delete("/delete_celery_worker/{worker_name}", response_model=dict)
+async def delete_celery_worker_api(worker_name: str, isadmin: Annotated[bool | None, Header(alias="isadmin")] = None):
+    try:
+        if isadmin is None or isadmin == False:
+            return response_fail(msg="Only the admin can delete the Worker")
+
+        worker_name = worker_name.strip()
+
+        server_list = get_celery_server_list()
+        if worker_name not in server_list:
+            return response_fail(msg=f"Worker '{worker_name}' it_doesn_t_exist")
+
+        del_celery_server_list(worker_name)
+
+        celery_redis = get_celery_worker_redis_db()
+        celery_info_details_key = get_celery_info_details_key(worker_name)
+        celery_redis.delete(celery_info_details_key)
+
+        logger.info(f"the_administrator_manually_deletes_the_worker: {worker_name}")
+        return response_success(msg=f"successfully_deleted_worker: {worker_name}")
+    except Exception as e:
+        logger.error(f"delete_celery_worker error: {e}")
+        return response_fail(msg=f"failed_to_delete_the_worker: {str(e)}")
+
diff --git a/data_server/api/endpoints/jump_to_studio.py b/data_server/api/endpoints/jump_to_studio.py
@@ -4,26 +4,28 @@
 from loguru import logger
 from typing import Optional
 
-from data_server.schemas.responses import response_success
+from data_server.schemas.responses import response_success, response_fail
 
 router = APIRouter()
 
-
 BASE_STUDIO_URL = os.getenv("STUDIO_JUMP_URL", "http://192.168.2.6:8080")
 
 
 @router.post("/jump-to-studio", tags=["studio"])
 async def jump_to_studio(
-    authorization: Optional[str] = Header(None, alias="authorization"),
-    user_token: Optional[str] = Header(None, alias="User-Token"),
-    user_name: Optional[str] = Header(None, alias="User-Name"),
+        authorization: Optional[str] = Header(None, alias="Authorization"),
+        user_token: Optional[str] = Header(None, alias="User-Token"),
+        user_name: Optional[str] = Header(None, alias="User-Name"),
+        user_email: Optional[str] = Header(None, alias="User-Email"),
 ):
     """Jump to studio with credentials from headers."""
 
-    if user_name:
-        target_url = f"{BASE_STUDIO_URL}/user/login_verfy/?email={user_name}@qq.com"
-    else:
-        target_url = f"{BASE_STUDIO_URL}/user/login_verfy/?email=z275748353@qq.com"
+    if not user_email:
+        logger.error("Missing User-Email header")
+        return response_fail(msg="缺少 User-Email 请求头")
+
+    email = user_email
+    target_url = f"{BASE_STUDIO_URL}/user/login_verfy/?email={email}"
 
     # Prepare the JSON payload with credentials from headers
     payload = {
@@ -46,7 +48,7 @@ async def jump_to_studio(
             # Send the payload in the JSON body of the POST request
             response = await client.post(target_url, data=payload)
             response.raise_for_status()  # Raise an exception for 4xx/5xx responses
-            target_url = f"{BASE_STUDIO_URL}/user/login_reques/?email={user_name}@qq.com"
+            target_url = f"{BASE_STUDIO_URL}/user/login_reques/?email={email}"
             return response_success(data=target_url)
     except httpx.RequestError as exc:
         logger.error(f"Request to studio failed: {exc}")
diff --git a/data_server/api/endpoints/operator_permission.py b/data_server/api/endpoints/operator_permission.py
@@ -48,7 +48,7 @@ def create_permission_api(request_data: OperatorPermissionCreateRequest, db: Ses
             if org.path not in existing_org_paths:
                 new_permissions_data.append({
                     "operator_id": operator_id,
-                    "name": org.name,
+                    "name": org.name if org.name is not None else "",
                     "path": org.path,
                     "role_type": 2,
                 })
diff --git a/data_server/operator/schemas.py b/data_server/operator/schemas.py
@@ -142,7 +142,7 @@ class UserPermission(BaseModel):
     username: Optional[str] = None
 
 class OrgPermission(BaseModel):
-    name: str
+    name: Optional[str] = None
     path: str
 
 class OperatorPermissionCreateRequest(BaseModel):