Skip to content

Commit c175f07

Browse files
z275748353zhanglongbinHaiHui886
authored
Add template deletion function​ (#37)
* 1.Add template deletion function​ 2.Celery node deletion (admin only, offline status) * Fix the bug of dataflow with ID #29 * Fix the bug of dataflow with ID #31 * Fix the bug of dataflow with ID #35 * Fix the bug of dataflow with ID #36 --------- Co-authored-by: zhanglongbin <[email protected]> Co-authored-by: HaiHui <[email protected]>
1 parent 2c80d08 commit c175f07

File tree

8 files changed

+152
-40
lines changed

8 files changed

+152
-40
lines changed

data_celery/datasource/mongo/tasks.py

Lines changed: 102 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,52 @@
11
import shutil
22
from data_celery.main import celery_app
3-
import time,os,json
3+
import time, os, json
44
from data_server.database.session import get_sync_session
55
from sqlalchemy.orm import Session
6-
from data_server.datasource.DatasourceModels import CollectionTask,DataSourceTaskStatusEnum,DataSourceTypeEnum
6+
from data_server.datasource.DatasourceModels import CollectionTask, DataSourceTaskStatusEnum, DataSourceTypeEnum
77
from data_celery.db.DatasourceManager import get_collection_task_by_uid
88
from data_celery.utils import (ensure_directory_exists,
99
get_current_ip, get_current_time, get_datasource_temp_parquet_dir,
1010
ensure_directory_exists_remove, get_datasource_csg_hub_server_dir)
1111
from data_server.datasource.services.datasource import get_datasource_connector
12-
from data_celery.mongo_tools.tools import insert_datasource_run_task_log_info,insert_datasource_run_task_log_error
12+
from data_celery.mongo_tools.tools import insert_datasource_run_task_log_info, insert_datasource_run_task_log_error
1313
from data_engine.exporter.load import load_exporter
1414
from pathlib import Path
1515
import pandas as pd
1616
from loguru import logger
1717

18+
# Import BSON types for MongoDB ObjectId conversion
19+
from datetime import datetime, date
20+
21+
try:
22+
from bson import ObjectId
23+
from bson.errors import InvalidId
24+
25+
BSON_AVAILABLE = True
26+
except ImportError:
27+
BSON_AVAILABLE = False
28+
ObjectId = None
29+
30+
31+
def convert_mongo_document(doc):
32+
"""
33+
Convert MongoDB document to JSON-serializable format.
34+
Handles ObjectId, datetime, and other BSON types.
35+
"""
36+
if isinstance(doc, dict):
37+
return {key: convert_mongo_document(value) for key, value in doc.items()}
38+
elif isinstance(doc, list):
39+
return [convert_mongo_document(item) for item in doc]
40+
elif BSON_AVAILABLE and isinstance(doc, ObjectId):
41+
return str(doc)
42+
elif isinstance(doc, (datetime, date)):
43+
return doc.isoformat()
44+
else:
45+
return doc
46+
1847

1948
@celery_app.task(name="collection_mongo_task")
20-
def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
49+
def collection_mongo_task(task_uid: str, user_name: str, user_token: str):
2150
"""
2251
Collection task
2352
Args:
@@ -64,7 +93,7 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
6493
collection_task.start_run_at = get_current_time()
6594
db_session.commit()
6695
# Read data source
67-
extra_config = collection_task.datasource.extra_config
96+
extra_config = json.loads(collection_task.datasource.extra_config)
6897
if not extra_config:
6998
collection_task.task_status = DataSourceTaskStatusEnum.ERROR.value
7099
insert_datasource_run_task_log_error(task_uid, f"Task with UID {task_uid} has no extra configuration.")
@@ -76,13 +105,19 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
76105
return False
77106
mongo_config = extra_config["mongo"]
78107
max_line = 10000
79-
csg_hub_dataset_id = 0
108+
csg_hub_dataset_id = ''
80109
csg_hub_dataset_default_branch = "main"
81110
if "csg_hub_dataset_default_branch" in extra_config:
82111
csg_hub_dataset_default_branch = extra_config["csg_hub_dataset_default_branch"]
83-
if "csg_hub_dataset_id" in extra_config and isinstance(extra_config['csg_hub_dataset_id'], int):
112+
if "csg_hub_dataset_id" in extra_config:
84113
csg_hub_dataset_id = extra_config["csg_hub_dataset_id"]
85-
if csg_hub_dataset_id <= 0:
114+
# Read csg_hub_dataset_name if provided, otherwise use default branch
115+
csg_hub_dataset_name = None
116+
if "csg_hub_dataset_name" in extra_config and extra_config['csg_hub_dataset_name'] != '':
117+
csg_hub_dataset_name = extra_config["csg_hub_dataset_name"]
118+
else:
119+
csg_hub_dataset_name = csg_hub_dataset_default_branch
120+
if csg_hub_dataset_id is None or csg_hub_dataset_id == '':
86121
collection_task.task_status = DataSourceTaskStatusEnum.ERROR.value
87122
insert_datasource_run_task_log_error(task_uid, f"Task with UID {task_uid} has no CSG Hub Dataset ID.")
88123
return False
@@ -96,7 +131,8 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
96131
connector = get_datasource_connector(collection_task.datasource)
97132
if not connector.test_connection():
98133
collection_task.task_status = DataSourceTaskStatusEnum.ERROR.value
99-
insert_datasource_run_task_log_error(task_uid, f"Task with UID {task_uid} failed to connect to the database.")
134+
insert_datasource_run_task_log_error(task_uid,
135+
f"Task with UID {task_uid} failed to connect to the database.")
100136
return False
101137

102138
total_count = 0
@@ -117,11 +153,22 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
117153
while True:
118154
# Execute pagination query (specific implementation depends on connector details)
119155
rows = connector.query_collection(collection_name, offset=(page - 1) * page_size,
120-
limit=page_size)
156+
limit=page_size)
121157

122158
if not rows:
123159
break # If there is no more data, exit the loop
124160

161+
# Add rows to buffer, converting MongoDB types to JSON-serializable format
162+
if isinstance(rows, list):
163+
# Convert each document to handle ObjectId and other BSON types
164+
converted_rows = [convert_mongo_document(row) for row in rows]
165+
rows_buffer.extend(converted_rows)
166+
else:
167+
# If rows is a generator or iterator, convert to list first
168+
rows_list = list(rows)
169+
converted_rows = [convert_mongo_document(row) for row in rows_list]
170+
rows_buffer.extend(converted_rows)
171+
125172
# If the number of rows in the buffer list reaches or exceeds the maximum number of rows, write to the file and clear the buffer list
126173
if len(rows_buffer) >= max_line:
127174
file_path = os.path.join(table_dir, f"data_{file_index:04d}.parquet")
@@ -130,7 +177,8 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
130177
current_file_row_count += len(rows_buffer)
131178
records_count += len(rows_buffer)
132179
collection_task.records_count = records_count
133-
insert_datasource_run_task_log_info(task_uid, f"Task with UID {task_uid} get data count {records_count}...")
180+
insert_datasource_run_task_log_info(task_uid,
181+
f"Task with UID {task_uid} get data count {records_count}...")
134182
db_session.commit()
135183
file_index += 1
136184
rows_buffer = [] # Clear the buffer list
@@ -143,16 +191,18 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
143191
current_file_row_count += len(rows_buffer)
144192
records_count += len(rows_buffer)
145193
collection_task.records_count = records_count
146-
insert_datasource_run_task_log_info(task_uid, f"Task with UID {task_uid} get data count {records_count}...")
194+
insert_datasource_run_task_log_info(task_uid,
195+
f"Task with UID {task_uid} get data count {records_count}...")
147196
db_session.commit()
148197

149198
except Exception as e:
150-
insert_datasource_run_task_log_error(task_uid, f"Task with UID {task_uid} failed to get collection document {collection_name}: {e}")
199+
insert_datasource_run_task_log_error(task_uid,
200+
f"Task with UID {task_uid} failed to get collection document {collection_name}: {e}")
151201
collection_task.records_count = total_count
152202
collection_task.total_count = total_count
153203
db_session.commit()
154204
upload_to_csg_hub_server(csg_hub_dataset_id,
155-
csg_hub_dataset_default_branch,
205+
csg_hub_dataset_name,
156206
user_name, user_token, db_session,
157207
collection_task, datasource_temp_parquet_dir,
158208
datasource_csg_hub_server_dir)
@@ -176,15 +226,15 @@ def collection_mongo_task(task_uid: str,user_name: str,user_token: str):
176226
return True
177227

178228

179-
def upload_to_csg_hub_server(csg_hub_dataset_id: int,
229+
def upload_to_csg_hub_server(csg_hub_dataset_id: str,
180230
csg_hub_dataset_default_branch: str,
181-
user_name: str,user_token: str,db_session: Session,
182-
collection_task: CollectionTask,datasource_temp_json_dir: str,
231+
user_name: str, user_token: str, db_session: Session,
232+
collection_task: CollectionTask, datasource_temp_json_dir: str,
183233
datasource_csg_hub_server_dir: str):
184234
"""
185235
Upload to CSG Hub server
186236
Args:
187-
csg_hub_dataset_id (int): CSG Hub dataset ID
237+
csg_hub_dataset_id (str): CSG Hub dataset ID
188238
csg_hub_dataset_default_branch (str): CSG Hub dataset default branch
189239
user_name (str): User name
190240
user_token (str): User token
@@ -198,26 +248,54 @@ def upload_to_csg_hub_server(csg_hub_dataset_id: int,
198248
# Upload to CSG Hub server
199249

200250
ensure_directory_exists_remove(datasource_csg_hub_server_dir)
201-
insert_datasource_run_task_log_info(collection_task.task_uid, f"Starting upload csg hub-server the task[{collection_task.task_uid}]...")
251+
insert_datasource_run_task_log_info(collection_task.task_uid,
252+
f"Starting upload csg hub-server the task[{collection_task.task_uid}]...")
202253
exporter = load_exporter(
203254
export_path=datasource_temp_json_dir,
204-
repo_id=str(csg_hub_dataset_id),
255+
repo_id=csg_hub_dataset_id,
205256
branch=csg_hub_dataset_default_branch,
206257
user_name=user_name,
207258
user_token=user_token,
208259
work_dir=datasource_csg_hub_server_dir
209260
)
210-
upload_path: Path = Path(datasource_csg_hub_server_dir)
261+
upload_path: Path = Path(datasource_temp_json_dir)
262+
# Check whether the uploaded directory exists and is not empty
263+
if not os.path.exists(upload_path):
264+
insert_datasource_run_task_log_error(collection_task.task_uid,
265+
f"the task[{collection_task.task_uid}] upload csg hub-server fail: upload path {upload_path} does not exist")
266+
return False
267+
268+
# List all files in the upload directory for debugging
269+
file_list = []
270+
for root, dirs, files in os.walk(upload_path):
271+
for file in files:
272+
file_list.append(os.path.join(root, file))
273+
insert_datasource_run_task_log_info(collection_task.task_uid,
274+
f"Files to upload: {len(file_list)} files found in {upload_path}")
275+
if len(file_list) == 0:
276+
insert_datasource_run_task_log_error(collection_task.task_uid,
277+
f"the task[{collection_task.task_uid}] upload csg hub-server fail: upload path {upload_path} is empty")
278+
return False
279+
211280
output_branch_name = exporter.export_from_files(upload_path)
212281

213282
if output_branch_name:
214283
collection_task.csg_hub_branch = output_branch_name
215284
db_session.commit()
216-
insert_datasource_run_task_log_info(collection_task.task_uid, f"the task[{collection_task.task_uid}] upload csg hub-server success...")
285+
insert_datasource_run_task_log_info(collection_task.task_uid,
286+
f"the task[{collection_task.task_uid}] upload csg hub-server success...")
217287
else:
218-
insert_datasource_run_task_log_error(collection_task.task_uid, f"the task[{collection_task.task_uid}] upload csg hub-server fail...")
288+
insert_datasource_run_task_log_error(collection_task.task_uid,
289+
f"the task[{collection_task.task_uid}] upload csg hub-server fail: export_from_files returned None")
219290
except Exception as e:
220291
logger.error(e)
221-
insert_datasource_run_task_log_error(collection_task.task_uid,f"Task UID {collection_task.task_uid} Error occurred while uploading to CSG Hub server: {e}")
292+
error_msg = str(e)
293+
# Check if this is a "nothing to commit" error
294+
if "nothing to commit" in error_msg.lower() or "working tree clean" in error_msg.lower():
295+
insert_datasource_run_task_log_error(collection_task.task_uid,
296+
f"the task[{collection_task.task_uid}] upload csg hub-server fail: No files to commit. This may happen if: 1) Files are already committed in the branch, 2) Files are ignored by .gitignore, 3) File paths are incorrect. Error: {error_msg}")
297+
else:
298+
insert_datasource_run_task_log_error(collection_task.task_uid,
299+
f"Task UID {collection_task.task_uid} Error occurred while uploading to CSG Hub server: {error_msg}")
222300
return False
223301
return True

data_server/algo_templates/mapper/algo_template_mapper.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def delete_template_by_id(db: Session, template_id: int, user_id: str) -> bool:
104104

105105
template = db.query(AlgoTemplate).filter(
106106
AlgoTemplate.id == template_id,
107-
AlgoTemplate.user_id == user_id
107+
AlgoTemplate.user_id == user_id,
108+
AlgoTemplate.buildin == False
108109
).first()
109110

110111
if not template:

data_server/algo_templates/utils/parse_algo_dslText.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import yaml
22
from collections import deque, defaultdict
3+
from loguru import logger
34

45

56
def convert_raw_to_processed(raw_yaml: str) -> str:
@@ -27,6 +28,12 @@ def convert_raw_to_processed(raw_yaml: str) -> str:
2728
source = edge['source']
2829
target = edge['target']
2930

31+
if source not in id_to_node:
32+
logger.error(f"edges中引用的source节点 '{source}' 不存在于process中!")
33+
raise ValueError(f"edges中引用的source节点 '{source}' 不存在于process中!")
34+
if target not in id_to_node:
35+
logger.error(f"edges中引用的target节点 '{target}' 不存在于process中!")
36+
raise ValueError(f"edges中引用的target节点 '{target}' 不存在于process中!")
3037

3138
adj[source].append(target)
3239
in_degree[target] += 1

data_server/api/endpoints/algo_templates.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ async def create_algo_template(
133133

134134
except Exception as e:
135135
logger.error(f"创建算法模板失败: {e}")
136-
return response_fail(msg="创建失败")
136+
return response_fail(msg="创建模版失败:" + str(e))
137137
finally:
138138
db.close()
139139

@@ -179,7 +179,7 @@ async def update_algo_template(
179179

180180
except Exception as e:
181181
logger.error(f"更新算法模板失败: {e}")
182-
return response_fail(msg="更新失败")
182+
return response_fail(msg="算法更新失败:" + str(e))
183183
finally:
184184
db.close()
185185

data_server/api/endpoints/celery_server.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from loguru import logger
77
from data_server.database.session import get_sync_session
88
from data_server.schemas.responses import response_success, response_fail
9-
from data_celery.redis_tools.tools import get_celery_server_list
9+
from data_celery.redis_tools.tools import get_celery_server_list, del_celery_server_list
1010
from data_server.database.session import get_celery_worker_redis_db,get_celery_info_details_key
1111
from data_celery.utils import get_timestamp
1212

@@ -56,3 +56,27 @@ async def get_celery_server_list_api(isadmin: Annotated[bool | None, Header(alia
5656
return response_fail(msg="获取Celery服务器列表失败")
5757

5858

59+
@router.delete("/delete_celery_worker/{worker_name}", response_model=dict)
60+
async def delete_celery_worker_api(worker_name: str, isadmin: Annotated[bool | None, Header(alias="isadmin")] = None):
61+
try:
62+
if isadmin is None or isadmin == False:
63+
return response_fail(msg="Only the admin can delete the Worker")
64+
65+
worker_name = worker_name.strip()
66+
67+
server_list = get_celery_server_list()
68+
if worker_name not in server_list:
69+
return response_fail(msg=f"Worker '{worker_name}' it_doesn_t_exist")
70+
71+
del_celery_server_list(worker_name)
72+
73+
celery_redis = get_celery_worker_redis_db()
74+
celery_info_details_key = get_celery_info_details_key(worker_name)
75+
celery_redis.delete(celery_info_details_key)
76+
77+
logger.info(f"the_administrator_manually_deletes_the_worker: {worker_name}")
78+
return response_success(msg=f"successfully_deleted_worker: {worker_name}")
79+
except Exception as e:
80+
logger.error(f"delete_celery_worker error: {e}")
81+
return response_fail(msg=f"failed_to_delete_the_worker: {str(e)}")
82+

data_server/api/endpoints/jump_to_studio.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,28 @@
44
from loguru import logger
55
from typing import Optional
66

7-
from data_server.schemas.responses import response_success
7+
from data_server.schemas.responses import response_success, response_fail
88

99
router = APIRouter()
1010

11-
1211
BASE_STUDIO_URL = os.getenv("STUDIO_JUMP_URL", "http://192.168.2.6:8080")
1312

1413

1514
@router.post("/jump-to-studio", tags=["studio"])
1615
async def jump_to_studio(
17-
authorization: Optional[str] = Header(None, alias="authorization"),
18-
user_token: Optional[str] = Header(None, alias="User-Token"),
19-
user_name: Optional[str] = Header(None, alias="User-Name"),
16+
authorization: Optional[str] = Header(None, alias="Authorization"),
17+
user_token: Optional[str] = Header(None, alias="User-Token"),
18+
user_name: Optional[str] = Header(None, alias="User-Name"),
19+
user_email: Optional[str] = Header(None, alias="User-Email"),
2020
):
2121
"""Jump to studio with credentials from headers."""
2222

23-
if user_name:
24-
target_url = f"{BASE_STUDIO_URL}/user/login_verfy/?email={user_name}@qq.com"
25-
else:
26-
target_url = f"{BASE_STUDIO_URL}/user/login_verfy/[email protected]"
23+
if not user_email:
24+
logger.error("Missing User-Email header")
25+
return response_fail(msg="缺少 User-Email 请求头")
26+
27+
email = user_email
28+
target_url = f"{BASE_STUDIO_URL}/user/login_verfy/?email={email}"
2729

2830
# Prepare the JSON payload with credentials from headers
2931
payload = {
@@ -46,7 +48,7 @@ async def jump_to_studio(
4648
# Send the payload in the JSON body of the POST request
4749
response = await client.post(target_url, data=payload)
4850
response.raise_for_status() # Raise an exception for 4xx/5xx responses
49-
target_url = f"{BASE_STUDIO_URL}/user/login_reques/?email={user_name}@qq.com"
51+
target_url = f"{BASE_STUDIO_URL}/user/login_reques/?email={email}"
5052
return response_success(data=target_url)
5153
except httpx.RequestError as exc:
5254
logger.error(f"Request to studio failed: {exc}")

data_server/api/endpoints/operator_permission.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def create_permission_api(request_data: OperatorPermissionCreateRequest, db: Ses
4848
if org.path not in existing_org_paths:
4949
new_permissions_data.append({
5050
"operator_id": operator_id,
51-
"name": org.name,
51+
"name": org.name if org.name is not None else "",
5252
"path": org.path,
5353
"role_type": 2,
5454
})

data_server/operator/schemas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ class UserPermission(BaseModel):
142142
username: Optional[str] = None
143143

144144
class OrgPermission(BaseModel):
145-
name: str
145+
name: Optional[str] = None
146146
path: str
147147

148148
class OperatorPermissionCreateRequest(BaseModel):

0 commit comments

Comments
 (0)