|
11 | 11 | from data_server.datasource.services.datasource import get_datasource_connector |
12 | 12 | from data_celery.mongo_tools.tools import insert_datasource_run_task_log_info, insert_datasource_run_task_log_error |
13 | 13 | from data_engine.exporter.load import load_exporter |
14 | | -from pathlib import Path |
15 | 14 | import pandas as pd |
16 | 15 | from loguru import logger |
17 | 16 |
|
@@ -256,46 +255,21 @@ def upload_to_csg_hub_server(csg_hub_dataset_id: str, |
256 | 255 | branch=csg_hub_dataset_default_branch, |
257 | 256 | user_name=user_name, |
258 | 257 | user_token=user_token, |
259 | | - work_dir=datasource_csg_hub_server_dir |
| 258 | + work_dir=datasource_csg_hub_server_dir, |
| 259 | + path_is_dir=True |
260 | 260 | ) |
261 | | - upload_path: Path = Path(datasource_temp_json_dir) |
262 | | - # Check whether the uploaded directory exists and is not empty |
263 | | - if not os.path.exists(upload_path): |
264 | | - insert_datasource_run_task_log_error(collection_task.task_uid, |
265 | | - f"the task[{collection_task.task_uid}] upload csg hub-server fail: upload path {upload_path} does not exist") |
266 | | - return False |
267 | | - |
268 | | - # List all files in the upload directory for debugging |
269 | | - file_list = [] |
270 | | - for root, dirs, files in os.walk(upload_path): |
271 | | - for file in files: |
272 | | - file_list.append(os.path.join(root, file)) |
273 | | - insert_datasource_run_task_log_info(collection_task.task_uid, |
274 | | - f"Files to upload: {len(file_list)} files found in {upload_path}") |
275 | | - if len(file_list) == 0: |
276 | | - insert_datasource_run_task_log_error(collection_task.task_uid, |
277 | | - f"the task[{collection_task.task_uid}] upload csg hub-server fail: upload path {upload_path} is empty") |
278 | | - return False |
279 | | - |
280 | | - output_branch_name = exporter.export_from_files(upload_path) |
281 | | - |
282 | | - if output_branch_name: |
283 | | - collection_task.csg_hub_branch = output_branch_name |
| 261 | + exporter.export_large_folder() |
| 262 | + if csg_hub_dataset_default_branch: |
| 263 | + collection_task.csg_hub_branch = csg_hub_dataset_default_branch |
284 | 264 | db_session.commit() |
285 | 265 | insert_datasource_run_task_log_info(collection_task.task_uid, |
286 | 266 | f"the task[{collection_task.task_uid}] upload csg hub-server success...") |
287 | 267 | else: |
288 | 268 | insert_datasource_run_task_log_error(collection_task.task_uid, |
289 | | - f"the task[{collection_task.task_uid}] upload csg hub-server fail: export_from_files returned None") |
| 269 | + f"the task[{collection_task.task_uid}] upload csg hub-server fail...") |
290 | 270 | except Exception as e: |
291 | 271 | logger.error(e) |
292 | | - error_msg = str(e) |
293 | | - # Check if this is a "nothing to commit" error |
294 | | - if "nothing to commit" in error_msg.lower() or "working tree clean" in error_msg.lower(): |
295 | | - insert_datasource_run_task_log_error(collection_task.task_uid, |
296 | | - f"the task[{collection_task.task_uid}] upload csg hub-server fail: No files to commit. This may happen if: 1) Files are already committed in the branch, 2) Files are ignored by .gitignore, 3) File paths are incorrect. Error: {error_msg}") |
297 | | - else: |
298 | | - insert_datasource_run_task_log_error(collection_task.task_uid, |
299 | | - f"Task UID {collection_task.task_uid} Error occurred while uploading to CSG Hub server: {error_msg}") |
| 272 | + insert_datasource_run_task_log_error(collection_task.task_uid, |
| 273 | + f"Task UID {collection_task.task_uid} Error occurred while uploading to CSG Hub server: {e}") |
300 | 274 | return False |
301 | 275 | return True |
0 commit comments