5050
5151
5252class GoogleDriveAccessConfig (AccessConfig ):
53- service_account_key : Optional [Annotated [ dict , BeforeValidator ( conform_string_to_dict )]] = Field (
54- default = None , description = "Credentials values to use for authentication"
55- )
53+ service_account_key : Optional [
54+ Annotated [ dict , BeforeValidator ( conform_string_to_dict )]
55+ ] = Field ( default = None , description = "Credentials values to use for authentication" )
5656 service_account_key_path : Optional [Path ] = Field (
57- default = None , description = "File path to credentials values to use for authentication"
57+ default = None ,
58+ description = "File path to credentials values to use for authentication" ,
5859 )
5960
6061 def model_post_init (self , __context : Any ) -> None :
@@ -111,10 +112,9 @@ class GoogleDriveIndexerConfig(IndexerConfig):
111112 extensions : Optional [list [str ]] = None
112113 recursive : bool = False
113114
114- def __post_init__ (self ):
115- # Strip leading period of extension
115+ def model_post_init (self , __context : Any ) -> None :
116116 if self .extensions is not None :
117- self .extensions = [e [ 1 :] if e . startswith ("." ) else e for e in self .extensions ]
117+ self .extensions = [e . lstrip ("." ) for e in self .extensions ]
118118
119119
120120@dataclass
@@ -165,10 +165,14 @@ def verify_drive_api_enabled(client) -> None:
165165 Please enable it in the Google Cloud Console."
166166 )
167167 else :
168- raise SourceConnectionError ("Google drive API unreachable for an unknown reason!" )
168+ raise SourceConnectionError (
169+ "Google drive API unreachable for an unknown reason!"
170+ )
169171
170172 @staticmethod
171- def count_files_recursively (files_client , folder_id : str , extensions : list [str ] = None ) -> int :
173+ def count_files_recursively (
174+ files_client , folder_id : str , extensions : list [str ] = None
175+ ) -> int :
172176 """
173177 Count non-folder files recursively under the given folder.
174178 If `extensions` is provided, only count files
@@ -247,7 +251,9 @@ def precheck(self) -> None:
247251 # that the service account has proper permissions."
248252 # )
249253 else :
250- logger .info (f"Found { file_count } files recursively in the folder." )
254+ logger .info (
255+ f"Found { file_count } files recursively in the folder."
256+ )
251257 else :
252258 # Non-recursive: check for at least one immediate non-folder child.
253259 response = client .list (
@@ -275,7 +281,8 @@ def precheck(self) -> None:
275281
276282 except Exception as e :
277283 logger .error (
278- "Failed to validate Google Drive connection during precheck" , exc_info = True
284+ "Failed to validate Google Drive connection during precheck" ,
285+ exc_info = True ,
279286 )
280287 raise SourceConnectionError (f"Precheck failed: { e } " )
281288
@@ -295,7 +302,9 @@ def map_file_data(f: dict) -> FileData:
295302 date_modified_str = f .pop ("modifiedTime" , None )
296303 parent_path = f .pop ("parent_path" , None )
297304 parent_root_path = f .pop ("parent_root_path" , None )
298- date_modified_dt = parser .parse (date_modified_str ) if date_modified_str else None
305+ date_modified_dt = (
306+ parser .parse (date_modified_str ) if date_modified_str else None
307+ )
299308 if (
300309 parent_path
301310 and isinstance (parent_path , str )
@@ -380,7 +389,9 @@ def get_paginated_results(
380389 return files_response
381390
382391 def get_root_info (self , files_client , object_id : str ) -> dict :
383- return files_client .get (fileId = object_id , fields = "," .join (self .fields )).execute ()
392+ return files_client .get (
393+ fileId = object_id , fields = "," .join (self .fields )
394+ ).execute ()
384395
385396 def get_files (
386397 self ,
@@ -391,7 +402,9 @@ def get_files(
391402 ) -> list [FileData ]:
392403 root_info = self .get_root_info (files_client = files_client , object_id = object_id )
393404 if not self .is_dir (root_info ):
394- root_info ["permissions" ] = self .extract_permissions (root_info .get ("permissions" ))
405+ root_info ["permissions" ] = self .extract_permissions (
406+ root_info .get ("permissions" )
407+ )
395408 data = [self .map_file_data (root_info )]
396409 else :
397410 file_contents = self .get_paginated_results (
@@ -476,13 +489,19 @@ def _get_content(self, downloader: "MediaIoBaseDownload") -> bool:
476489 _ , downloaded = downloader .next_chunk ()
477490 return downloaded
478491
479- def _write_file (self , file_data : FileData , file_contents : io .BytesIO ) -> DownloadResponse :
492+ def _write_file (
493+ self , file_data : FileData , file_contents : io .BytesIO
494+ ) -> DownloadResponse :
480495 download_path = self .get_download_path (file_data = file_data )
481496 download_path .parent .mkdir (parents = True , exist_ok = True )
482- logger .debug (f"writing { file_data .source_identifiers .fullpath } to { download_path } " )
497+ logger .debug (
498+ f"writing { file_data .source_identifiers .fullpath } to { download_path } "
499+ )
483500 with open (download_path , "wb" ) as handler :
484501 handler .write (file_contents .getbuffer ())
485- return self .generate_download_response (file_data = file_data , download_path = download_path )
502+ return self .generate_download_response (
503+ file_data = file_data , download_path = download_path
504+ )
486505
487506 @requires_dependencies (["googleapiclient" ], extras = "google-drive" )
488507 def run (self , file_data : FileData , ** kwargs : Any ) -> DownloadResponse :
0 commit comments