1313from dataclasses import dataclass
1414from typing import Any
1515
16- from unstract .core .data_models import ConnectionType as CoreConnectionType
16+ from shared .infrastructure .logging .logger import WorkerLogger
17+ from shared .workflow .logger_helper import WorkflowLoggerHelper
1718
18- from .. infrastructure . logging . logger import WorkerLogger
19+ from unstract . core . data_models import ConnectionType as CoreConnectionType
1920
2021logger = WorkerLogger .get_logger (__name__ )
2122
@@ -94,6 +95,7 @@ def __init__(self, config: SourceConfig, workflow_log=None):
9495 self .connection_type = config .connection_type
9596 self .settings = config .settings
9697 self .workflow_log = workflow_log
98+ self .logger_helper = WorkflowLoggerHelper (workflow_log )
9799
98100 # Store connector instance details
99101 self .connector_id = config .connector_id
@@ -110,27 +112,39 @@ def get_fsspec_fs(self):
110112
111113 This method replicates backend logic for getting filesystem access.
112114 """
113- if self .connection_type == self .ConnectionType .API_STORAGE :
114- # API storage uses workflow execution storage
115- from unstract .filesystem import FileStorageType , FileSystem
115+ try :
116+ if self .connection_type == self .ConnectionType .API_STORAGE :
117+ # API storage uses workflow execution storage
118+ from unstract .filesystem import FileStorageType , FileSystem
119+
120+ file_system = FileSystem (FileStorageType .WORKFLOW_EXECUTION )
121+ return file_system .get_file_storage ()
116122
117- file_system = FileSystem (FileStorageType .WORKFLOW_EXECUTION )
118- return file_system .get_file_storage ()
123+ if not self .connector_id or not self .connector_settings :
124+ error_msg = (
125+ "Source connector not configured - missing connector_id or settings"
126+ )
127+ self .logger_helper .log_error (logger , error_msg )
128+ raise Exception (error_msg )
119129
120- if not self . connector_id or not self . connector_settings :
121- raise Exception ( "Source connector not configured" )
130+ # Get the connector instance using connectorkit
131+ from unstract . connectors . connectorkit import Connectorkit
122132
123- # Get the connector instance using connectorkit
124- from unstract .connectors .connectorkit import Connectorkit
133+ connectorkit = Connectorkit ()
134+ connector_class = connectorkit .get_connector_class_by_connector_id (
135+ self .connector_id
136+ )
137+ connector_instance = connector_class (self .connector_settings )
125138
126- connectorkit = Connectorkit ()
127- connector_class = connectorkit .get_connector_class_by_connector_id (
128- self .connector_id
129- )
130- connector_instance = connector_class (self .connector_settings )
139+ # Get fsspec filesystem
140+ fs = connector_instance .get_fsspec_fs ()
141+ return fs
131142
132- # Get fsspec filesystem
133- return connector_instance .get_fsspec_fs ()
143+ except Exception as e :
144+ error_msg = f"Failed to initialize source connector filesystem: { str (e )} "
145+ self .logger_helper .log_error (logger , error_msg )
146+ logger .error (error_msg )
147+ raise
134148
135149 def read_file_content (self , file_path : str ) -> bytes :
136150 """Read file content from source connector.
@@ -164,7 +178,6 @@ def list_files(
164178 List of file information dictionaries
165179 """
166180 fs = self .get_fsspec_fs ()
167-
168181 # Implementation would list files using fsspec
169182 # This is a simplified version
170183 try :
@@ -186,7 +199,9 @@ def list_files(
186199
187200 return files
188201 except Exception as e :
189- logger .error (f"Failed to list files from source: { e } " )
202+ error_msg = f"Failed to list files from source connector directory '{ input_directory } ': { str (e )} "
203+ self .logger_helper .log_error (logger , error_msg )
204+ logger .error (error_msg )
190205 return []
191206
192207 def validate (self ) -> None :
@@ -198,11 +213,15 @@ def validate(self) -> None:
198213 self .ConnectionType .API ,
199214 self .ConnectionType .API_STORAGE ,
200215 ]:
201- raise Exception (f"Invalid source connection type: { connection_type } " )
216+ error_msg = f"Invalid source connection type: { connection_type } "
217+ self .logger_helper .log_error (logger , error_msg )
218+ raise Exception (error_msg )
202219
203220 if connection_type == self .ConnectionType .FILESYSTEM :
204221 if not self .connector_id or not self .connector_settings :
205- raise Exception ("Filesystem source requires connector configuration" )
222+ error_msg = "Filesystem source requires connector configuration"
223+ self .logger_helper .log_error (logger , error_msg )
224+ raise Exception (error_msg )
206225
207226 def get_config (self ) -> SourceConfig :
208227 """Get serializable configuration for the source connector."""
0 commit comments