@@ -120,7 +120,9 @@ def test_get_articles(self) -> None:
120120 assert output .records
121121
122122 def test_get_article_attachments (self ) -> None :
123- with HttpMocker () as http_mocker :
123+ with HttpMocker () as http_mocker , patch (
124+ 'airbyte_cdk.sources.declarative.retrievers.file_uploader.noop_file_writer.NoopFileWriter.write' ) as mock_noop_write , patch (
125+ 'airbyte_cdk.sources.declarative.retrievers.file_uploader.local_file_system_file_writer.LocalFileSystemFileWriter.write' ) as mock_file_system_write :
124126 http_mocker .get (
125127 HttpRequest (url = STREAM_URL ),
126128 HttpResponse (json .dumps (find_template ("file_api/articles" , __file__ )), 200 ),
@@ -138,14 +140,21 @@ def test_get_article_attachments(self) -> None:
138140 ),
139141 )
140142
143+ file_size = 12345
144+ mock_file_system_write .return_value = file_size # Simulate a file size
145+
141146 output = read (
142147 self ._config (),
143148 CatalogBuilder ()
144- .with_stream (ConfiguredAirbyteStreamBuilder ().with_name ("article_attachments" ))
149+ .with_stream (ConfiguredAirbyteStreamBuilder ().with_name ("article_attachments" ). with_include_files ( True ) )
145150 .build (),
146151 )
147152
148153 assert output .records
154+ # Ensure that FileSystemFileWriter is called.
155+ mock_file_system_write .assert_called ()
156+ # Ensure that NoopFileWriter is not called.
157+ mock_noop_write .assert_not_called ()
149158 file_reference = output .records [0 ].record .file_reference
150159 assert file_reference
151160 assert file_reference .staging_file_url
@@ -158,7 +167,8 @@ def test_get_article_attachments(self) -> None:
158167 )
159168 assert file_reference .file_size_bytes
160169
161- def test_get_article_attachments_with_filename_extractor (self ) -> None :
170+ def test_get_article_attachments_and_file_is_uploaded (self ) -> None :
171+ """Test that article attachments can be read and the file is uploaded to the staging directory"""
162172 with HttpMocker () as http_mocker :
163173 http_mocker .get (
164174 HttpRequest (url = STREAM_URL ),
@@ -180,12 +190,52 @@ def test_get_article_attachments_with_filename_extractor(self) -> None:
180190 output = read (
181191 self ._config (),
182192 CatalogBuilder ()
183- .with_stream (ConfiguredAirbyteStreamBuilder ().with_name ("article_attachments" ))
193+ .with_stream (ConfiguredAirbyteStreamBuilder ().with_name ("article_attachments" ).with_include_files (True ))
194+ .build (),
195+ yaml_file = "test_file_stream_with_filename_extractor.yaml" ,
196+ )
197+ file_reference = output .records [0 ].record .file_reference
198+ assert file_reference .file_size_bytes
199+ assert Path (file_reference .staging_file_url ).exists (), "File should be uploaded to the staging directory"
200+
201+ def test_get_article_attachments_with_filename_extractor (self ) -> None :
202+ """Test that article attachments can be read with filename extractor and file system writer is called"""
203+ with HttpMocker () as http_mocker , patch (
204+ 'airbyte_cdk.sources.declarative.retrievers.file_uploader.noop_file_writer.NoopFileWriter.write' ) as mock_noop_write , patch (
205+ 'airbyte_cdk.sources.declarative.retrievers.file_uploader.local_file_system_file_writer.LocalFileSystemFileWriter.write' ) as mock_file_system_write :
206+ http_mocker .get (
207+ HttpRequest (url = STREAM_URL ),
208+ HttpResponse (json .dumps (find_template ("file_api/articles" , __file__ )), 200 ),
209+ )
210+ http_mocker .get (
211+ HttpRequest (url = STREAM_ATTACHMENTS_URL ),
212+ HttpResponse (
213+ json .dumps (find_template ("file_api/article_attachments" , __file__ )), 200
214+ ),
215+ )
216+ http_mocker .get (
217+ HttpRequest (url = STREAM_ATTACHMENT_CONTENT_URL ),
218+ HttpResponse (
219+ find_binary_response ("file_api/article_attachment_content.png" , __file__ ), 200
220+ ),
221+ )
222+
223+ file_size = 12345
224+ mock_file_system_write .return_value = file_size # Simulate a file size
225+
226+ output = read (
227+ self ._config (),
228+ CatalogBuilder ()
229+ .with_stream (ConfiguredAirbyteStreamBuilder ().with_name ("article_attachments" ).with_include_files (True ))
184230 .build (),
185231 yaml_file = "test_file_stream_with_filename_extractor.yaml" ,
186232 )
187233
188234 assert len (output .records ) == 1
235+ # Ensure that FileSystemFileWriter is called.
236+ mock_file_system_write .assert_called ()
237+ # Ensure that NoopFileWriter is not called.
238+ mock_noop_write .assert_not_called ()
189239 file_reference = output .records [0 ].record .file_reference
190240 assert file_reference
191241 assert (
@@ -196,10 +246,63 @@ def test_get_article_attachments_with_filename_extractor(self) -> None:
196246 assert not re .match (
197247 r"^article_attachments/[0-9a-fA-F-]{36}$" , file_reference .source_file_relative_path
198248 )
199- assert file_reference .file_size_bytes
249+ assert file_reference .file_size_bytes == file_size
250+
251+ def test_get_article_attachments_without_include_files (self ) -> None :
252+ """Test that article attachments can be read without including files, it can be opt-out by configured catalog"""
253+ include_files = False
254+ with HttpMocker () as http_mocker , patch (
255+ 'airbyte_cdk.sources.declarative.retrievers.file_uploader.noop_file_writer.NoopFileWriter.write' ) as mock_noop_write , patch (
256+ 'airbyte_cdk.sources.declarative.retrievers.file_uploader.local_file_system_file_writer.LocalFileSystemFileWriter.write' ) as mock_file_system_write :
257+ http_mocker .get (
258+ HttpRequest (url = STREAM_URL ),
259+ HttpResponse (json .dumps (find_template ("file_api/articles" , __file__ )), 200 ),
260+ )
261+ http_mocker .get (
262+ HttpRequest (url = STREAM_ATTACHMENTS_URL ),
263+ HttpResponse (
264+ json .dumps (find_template ("file_api/article_attachments" , __file__ )), 200
265+ ),
266+ )
267+ http_mocker .get (
268+ HttpRequest (url = STREAM_ATTACHMENT_CONTENT_URL ),
269+ HttpResponse (
270+ find_binary_response ("file_api/article_attachment_content.png" , __file__ ), 200
271+ ),
272+ )
273+
274+ mock_noop_write .return_value = NoopFileWriter .NOOP_FILE_SIZE
275+
276+ output = read (
277+ self ._config (),
278+ CatalogBuilder ()
279+ .with_stream (ConfiguredAirbyteStreamBuilder ().with_name ("article_attachments" ).with_include_files (include_files ))
280+ .build (),
281+ yaml_file = "test_file_stream_with_filename_extractor.yaml" ,
282+ )
283+
284+ assert len (output .records ) == 1
285+ # Ensure that LocalFileSystemFileWriter is not called when include_files is False
286+ mock_file_system_write .assert_not_called ()
287+ # Ensure that NoopFileWriter is called to simulate file writing
288+ mock_noop_write .assert_called ()
289+ file_reference = output .records [0 ].record .file_reference
290+ assert file_reference
291+ assert (
292+ file_reference .staging_file_url
293+ == "/tmp/airbyte-file-transfer/article_attachments/12138758717583/some_image_name.png"
294+ )
295+
296+ assert file_reference .source_file_relative_path
297+ assert not re .match (
298+ r"^article_attachments/[0-9a-fA-F-]{36}$" , file_reference .source_file_relative_path
299+ )
300+ assert file_reference .file_size_bytes == NoopFileWriter .NOOP_FILE_SIZE
200301
201302 def test_get_article_attachments_messages_for_connector_builder (self ) -> None :
202- with HttpMocker () as http_mocker :
303+ with HttpMocker () as http_mocker , patch (
304+ 'airbyte_cdk.sources.declarative.retrievers.file_uploader.noop_file_writer.NoopFileWriter.write' ) as mock_noop_write , patch (
305+ 'airbyte_cdk.sources.declarative.retrievers.file_uploader.local_file_system_file_writer.LocalFileSystemFileWriter.write' ) as mock_file_system_write :
203306 http_mocker .get (
204307 HttpRequest (url = STREAM_URL ),
205308 HttpResponse (json .dumps (find_template ("file_api/articles" , __file__ )), 200 ),
@@ -217,6 +320,9 @@ def test_get_article_attachments_messages_for_connector_builder(self) -> None:
217320 ),
218321 )
219322
323+ file_size = NoopFileWriter .NOOP_FILE_SIZE
324+ mock_noop_write .return_value = file_size # Simulate a file size
325+
220326 # Define a mock factory that forces emit_connector_builder_messages=True
221327 class MockModelToComponentFactory (OriginalModelToComponentFactory ):
222328 def __init__ (self , * args , ** kwargs ):
@@ -231,12 +337,16 @@ def __init__(self, *args, **kwargs):
231337 output = read (
232338 self ._config (),
233339 CatalogBuilder ()
234- .with_stream (ConfiguredAirbyteStreamBuilder ().with_name ("article_attachments" ))
340+ .with_stream (ConfiguredAirbyteStreamBuilder ().with_name ("article_attachments" ). with_include_files ( True ) )
235341 .build (),
236342 yaml_file = "test_file_stream_with_filename_extractor.yaml" ,
237343 )
238344
239345 assert len (output .records ) == 1
346+ # Ensure that NoopFileWriter is called.
347+ mock_noop_write .assert_called ()
348+ # Ensure that LocalFileSystemFileWriter is not called.
349+ mock_file_system_write .assert_not_called ()
240350 file_reference = output .records [0 ].record .file_reference
241351 assert file_reference
242352 assert file_reference .staging_file_url
0 commit comments