99from enum import Enum
1010from io import IOBase
1111from os import makedirs, path
12- from typing import Any, Callable, Iterable, List, MutableMapping, Optional, Set, Tuple, Type
12+ from typing import Any, Iterable, List, MutableMapping, Optional, Set, Tuple
1313
1414from airbyte_protocol_dataclasses.models import FailureType
1515from wcmatch.glob import GLOBSTAR, globmatch
2222 use_file_transfer,
2323)
2424from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
25- from airbyte_cdk.sources.file_based.file_based_file_transfer_reader import (
26- AbstractFileBasedFileTransferReader,
27- )
2825from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
29- from airbyte_cdk.sources.file_based.remote_file import RemoteFile
26+ from airbyte_cdk.sources.file_based.remote_file import RemoteFile, UploadableRemoteFile
3027
3128
3229class FileReadMode(Enum):
@@ -40,16 +37,10 @@ class AbstractFileBasedStreamReader(ABC):
4037 FILE_NAME = "file_name"
4138 LOCAL_FILE_PATH = "local_file_path"
4239 FILE_FOLDER = "file_folder"
40+ FILE_SIZE_LIMIT = 1_500_000_000
4341
4442 def __init__(self) -> None:
4543 self._config = None
46- if (
47- self.file_transfer_reader_class is None
48- and type(self).upload is AbstractFileBasedStreamReader.upload
49- ):
50- raise NotImplementedError(
51- "One of file_transfer_reader_class or upload method must be defined to support file transfer."
52- )
5344
5445 @property
5546 def config(self) -> Optional[AbstractFileBasedSpec]:
@@ -156,12 +147,8 @@ def include_identities_stream(self) -> bool:
156147 return include_identities_stream(self.config)
157148 return False
158149
159- @property
160- def file_transfer_reader_class(self) -> Type[AbstractFileBasedFileTransferReader] | None:
161- return None
162-
163150 def upload(
164- self, file: RemoteFile , local_directory: str, logger: logging.Logger
151+ self, file: UploadableRemoteFile , local_directory: str, logger: logging.Logger
165152 ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
166153 """
167154 This is required for connectors that will support writing to
@@ -179,48 +166,45 @@ def upload(
179166 - file_size_bytes (int): The size of the referenced file in bytes.
180167 - source_file_relative_path (str): The relative path to the referenced file in source.
181168 """
182- if self.file_transfer_reader_class is None:
183- raise NotImplementedError(
184- "file_transfer_reader_class must be defined to support default file transfer upload method."
185- )
169+ if not isinstance(file, UploadableRemoteFile):
170+ raise TypeError(f"Expected UploadableRemoteFile, got {type(file)}")
186171
187- file_transfer = self.file_transfer_reader_class(file)
188- file_size = file_transfer.file_size
172+ file_size = file.size
189173
190- if file_size > file_transfer .FILE_SIZE_LIMIT:
191- message = f"File size exceeds the {file_transfer .FILE_SIZE_LIMIT / 1e9} GB limit."
174+ if file_size > self .FILE_SIZE_LIMIT:
175+ message = f"File size exceeds the {self .FILE_SIZE_LIMIT / 1e9} GB limit."
192176 raise FileSizeLimitError(
193177 message=message, internal_message=message, failure_type=FailureType.config_error
194178 )
195179
196180 file_paths = self._get_file_transfer_paths(
197- source_file_relative_path=file_transfer .source_file_relative_path,
181+ source_file_relative_path=file .source_file_relative_path,
198182 staging_directory=local_directory,
199183 )
200184 local_file_path = file_paths[self.LOCAL_FILE_PATH]
201185 file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
202186 file_name = file_paths[self.FILE_NAME]
203187
204188 logger.info(
205- f"Starting to download the file {file_transfer .file_uri_for_logging} with size: {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB)"
189+ f"Starting to download the file {file .file_uri_for_logging} with size: {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB)"
206190 )
207191 start_download_time = time.time()
208192
209- file_transfer .download_to_local_directory(local_file_path)
193+ file .download_to_local_directory(local_file_path)
210194
211195 write_duration = time.time() - start_download_time
212196 logger.info(
213- f"Finished downloading the file {file_transfer .file_uri_for_logging} and saved to {local_file_path} in {write_duration:,.2f} seconds."
197+ f"Finished downloading the file {file .file_uri_for_logging} and saved to {local_file_path} in {write_duration:,.2f} seconds."
214198 )
215199
216200 file_record_data = FileRecordData(
217201 folder=file_paths[self.FILE_FOLDER],
218202 file_name=file_name,
219203 bytes=file_size,
220- id=file_transfer.file_id ,
204+ id=file.id ,
221205 mime_type=file.mime_type,
222- created_at=file_transfer.file_created_at ,
223- updated_at=file_transfer.file_updated_at ,
206+ created_at=file.created_at ,
207+ updated_at=file.updated_at ,
224208 source_uri=file.uri,
225209 )
226210 file_reference = AirbyteRecordMessageFileReference(
0 commit comments