1212 ensure_directory_exists_remove , get_datasource_csg_hub_server_dir )
1313from data_celery .mongo_tools .tools import insert_datasource_run_task_log_info ,insert_datasource_run_task_log_error
1414from data_server .datasource .services .datasource import get_datasource_connector
15+ from data_server .datasource .schemas import DataSourceCreate
1516from data_engine .exporter .load import load_exporter
1617from pathlib import Path
1718import pandas as pd
@@ -103,13 +104,32 @@ def collection_hive_task(task_uid: str,user_name: str,user_token: str):
103104 max_line = extra_config ["max_line_json" ]
104105 if use_type == "sql" :
105106 if use_sql :
106- connector = get_datasource_connector (collection_task .datasource )
107- if not connector .test_connection ():
107+ try :
108+ # 将数据库对象转换为 DataSourceCreate 对象
109+ datasource_create = DataSourceCreate (
110+ name = collection_task .datasource .name ,
111+ des = collection_task .datasource .des ,
112+ source_type = collection_task .datasource .source_type ,
113+ host = collection_task .datasource .host ,
114+ port = collection_task .datasource .port ,
115+ username = collection_task .datasource .username ,
116+ password = collection_task .datasource .password ,
117+ database = collection_task .datasource .database ,
118+ auth_type = collection_task .datasource .auth_type
119+ )
120+ connector = get_datasource_connector (datasource_create )
121+ test_result = connector .test_connection ()
122+ if not test_result or not test_result .get ("success" , False ):
123+ collection_task .task_status = DataSourceTaskStatusEnum .ERROR .value
124+ error_msg = test_result .get ("message" , "Connection failed" ) if test_result else "Connection test returned None"
125+ insert_datasource_run_task_log_error (task_uid , f"Task with UID { task_uid } failed to connect to the database: { error_msg } " )
126+ return False
127+ get_table_dataset_by_sql (connector , task_uid , use_sql , db_session , collection_task ,
128+ datasource_temp_parquet_dir , max_line = max_line )
129+ except Exception as e :
108130 collection_task .task_status = DataSourceTaskStatusEnum .ERROR .value
109- insert_datasource_run_task_log_error (task_uid , f"Task with UID { task_uid } failed to connect to the database. " )
131+ insert_datasource_run_task_log_error (task_uid , f"Error occurred while executing the task: { str ( e ) } " )
110132 return False
111- get_table_dataset_by_sql (connector , task_uid , use_sql , db_session , collection_task ,
112- datasource_temp_parquet_dir , max_line = max_line )
113133 upload_path = datasource_temp_parquet_dir .join ('run_sql' )
114134 upload_to_csg_hub_server (csg_hub_dataset_id ,
115135 csg_hub_dataset_name ,
@@ -125,14 +145,34 @@ def collection_hive_task(task_uid: str,user_name: str,user_token: str):
125145 source = hive_config ["source" ]
126146 total_count = 0
127147 records_count = 0
128- connector = get_datasource_connector (collection_task .datasource )
129- if not connector .test_connection ():
148+ try :
149+ # 将数据库对象转换为 DataSourceCreate 对象
150+ datasource_create = DataSourceCreate (
151+ name = collection_task .datasource .name ,
152+ des = collection_task .datasource .des ,
153+ source_type = collection_task .datasource .source_type ,
154+ host = collection_task .datasource .host ,
155+ port = collection_task .datasource .port ,
156+ username = collection_task .datasource .username ,
157+ password = collection_task .datasource .password ,
158+ database = collection_task .datasource .database ,
159+ auth_type = collection_task .datasource .auth_type
160+ )
161+ connector = get_datasource_connector (datasource_create )
162+ test_result = connector .test_connection ()
163+ if not test_result or not test_result .get ("success" , False ):
164+ collection_task .task_status = DataSourceTaskStatusEnum .ERROR .value
165+ error_msg = test_result .get ("message" , "Connection failed" ) if test_result else "Connection test returned None"
166+ insert_datasource_run_task_log_error (task_uid , f"Task with UID { task_uid } failed to connect to the database: { error_msg } " )
167+ return False
168+ for table_name in source .keys ():
169+ table_total = connector .get_table_total_count_hive (table_name )
170+ total_count += table_total
171+ except Exception as e :
130172 collection_task .task_status = DataSourceTaskStatusEnum .ERROR .value
131- insert_datasource_run_task_log_error (task_uid , f"Task with UID { task_uid } failed to connect to the database. " )
173+ insert_datasource_run_task_log_error (task_uid , f"Error occurred while executing the task: { str ( e ) } " )
132174 return False
133- for table_name in source .keys ():
134- table_total = connector .get_table_total_count_hive (table_name )
135- total_count += table_total
175+
136176 collection_task .total_count = total_count
137177 collection_task .records_count = records_count
138178 db_session .commit ()
@@ -165,8 +205,14 @@ def collection_hive_task(task_uid: str,user_name: str,user_token: str):
165205 except Exception as e :
166206 if collection_task :
167207 collection_task .task_status = DataSourceTaskStatusEnum .ERROR .value
208+ error_type = type (e ).__name__
209+ error_msg = str (e )
210+ error_traceback = traceback .format_exc ()
211+ logger .error (f"Task { task_uid } error: { error_type } : { error_msg } " )
212+ logger .error (f"Full traceback:\n { error_traceback } " )
168213 traceback .print_exc ()
169- insert_datasource_run_task_log_error (task_uid , f"Error occurred while executing the task: { e } " )
214+ insert_datasource_run_task_log_error (task_uid , f"Error occurred while executing the task: { error_type } : { error_msg } " )
215+ insert_datasource_run_task_log_error (task_uid , f"Traceback: { error_traceback } " )
170216 return False
171217 finally :
172218 if collection_task :
0 commit comments