add a few more tests, formatting, fix accidentally removed tests

brianjlai · brianjlai · commit 9fe52051abfd · 2025-07-31T23:35:11.000-07:00
diff --git a/unit_tests/sources/declarative/decoders/test_decoders_memory_usage.py b/unit_tests/sources/declarative/decoders/test_decoders_memory_usage.py
@@ -31,71 +31,71 @@ def large_event_response_fixture():
     os.remove(file_path)
 
 
-# @pytest.mark.slow
-# @pytest.mark.limit_memory("20 MB")
-# @pytest.mark.parametrize(
-#     "decoder_yaml_definition",
-#     [
-#         "type: JsonlDecoder",
-#     ],
-# )
-# def test_jsonl_decoder_memory_usage(
-#     requests_mock, large_events_response, decoder_yaml_definition: str
-# ):
-#     #
-#     lines_in_response, file_path = large_events_response
-#     content = f"""
-#     name: users
-#     type: DeclarativeStream
-#     retriever:
-#       type: SimpleRetriever
-#       decoder:
-#         {decoder_yaml_definition}
-#       paginator:
-#         type: "NoPagination"
-#       requester:
-#         path: "users/{{{{ stream_slice.slice }}}}"
-#         type: HttpRequester
-#         url_base: "https://for-all-mankind.nasa.com/api/v1"
-#         http_method: GET
-#         authenticator:
-#           type: NoAuth
-#         request_headers: {{}}
-#         request_body_json: {{}}
-#       record_selector:
-#         type: RecordSelector
-#         extractor:
-#           type: DpathExtractor
-#           field_path: []
-#       partition_router:
-#         type: ListPartitionRouter
-#         cursor_field: "slice"
-#         values:
-#           - users1
-#           - users2
-#           - users3
-#           - users4
-#     primary_key: []
-#         """
-#
-#     factory = ModelToComponentFactory()
-#     stream_manifest = YamlDeclarativeSource._parse(content)
-#     stream = factory.create_component(
-#         model_type=DeclarativeStreamModel, component_definition=stream_manifest, config={}
-#     )
-#
-#     def get_body():
-#         return open(file_path, "rb", buffering=30)
-#
-#     counter = 0
-#     requests_mock.get("https://for-all-mankind.nasa.com/api/v1/users/users1", body=get_body())
-#     requests_mock.get("https://for-all-mankind.nasa.com/api/v1/users/users2", body=get_body())
-#     requests_mock.get("https://for-all-mankind.nasa.com/api/v1/users/users3", body=get_body())
-#     requests_mock.get("https://for-all-mankind.nasa.com/api/v1/users/users4", body=get_body())
-#
-#     stream_slices = list(stream.stream_slices(sync_mode=SyncMode.full_refresh))
-#     for stream_slice in stream_slices:
-#         for _ in stream.retriever.read_records(records_schema={}, stream_slice=stream_slice):
-#             counter += 1
-#
-#     assert counter == lines_in_response * len(stream_slices)
+@pytest.mark.slow
+@pytest.mark.limit_memory("20 MB")
+@pytest.mark.parametrize(
+    "decoder_yaml_definition",
+    [
+        "type: JsonlDecoder",
+    ],
+)
+def test_jsonl_decoder_memory_usage(
+    requests_mock, large_events_response, decoder_yaml_definition: str
+):
+    #
+    lines_in_response, file_path = large_events_response
+    content = f"""
+    name: users
+    type: DeclarativeStream
+    retriever:
+      type: SimpleRetriever
+      decoder:
+        {decoder_yaml_definition}
+      paginator:
+        type: "NoPagination"
+      requester:
+        path: "users/{{{{ stream_slice.slice }}}}"
+        type: HttpRequester
+        url_base: "https://for-all-mankind.nasa.com/api/v1"
+        http_method: GET
+        authenticator:
+          type: NoAuth
+        request_headers: {{}}
+        request_body_json: {{}}
+      record_selector:
+        type: RecordSelector
+        extractor:
+          type: DpathExtractor
+          field_path: []
+      partition_router:
+        type: ListPartitionRouter
+        cursor_field: "slice"
+        values:
+          - users1
+          - users2
+          - users3
+          - users4
+    primary_key: []
+        """
+
+    factory = ModelToComponentFactory()
+    stream_manifest = YamlDeclarativeSource._parse(content)
+    stream = factory.create_component(
+        model_type=DeclarativeStreamModel, component_definition=stream_manifest, config={}
+    )
+
+    def get_body():
+        return open(file_path, "rb", buffering=30)
+
+    counter = 0
+    requests_mock.get("https://for-all-mankind.nasa.com/api/v1/users/users1", body=get_body())
+    requests_mock.get("https://for-all-mankind.nasa.com/api/v1/users/users2", body=get_body())
+    requests_mock.get("https://for-all-mankind.nasa.com/api/v1/users/users3", body=get_body())
+    requests_mock.get("https://for-all-mankind.nasa.com/api/v1/users/users4", body=get_body())
+
+    stream_slices = list(stream.stream_slices(sync_mode=SyncMode.full_refresh))
+    for stream_slice in stream_slices:
+        for _ in stream.retriever.read_records(records_schema={}, stream_slice=stream_slice):
+            counter += 1
+
+    assert counter == lines_in_response * len(stream_slices)
diff --git a/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py b/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py
@@ -72,17 +72,17 @@ def large_event_response_fixture():
     os.remove(file_path)
 
 
-# @pytest.mark.slow
-# @pytest.mark.limit_memory("20 MB")
-# def test_response_to_file_extractor_memory_usage(requests_mock, large_events_response):
-#     lines_in_response, file_path = large_events_response
-#     extractor = ResponseToFileExtractor({})
-#
-#     url = "https://for-all-mankind.nasa.com/api/v1/users/users1"
-#     requests_mock.get(url, body=open(file_path, "rb"))
-#
-#     counter = 0
-#     for _ in extractor.extract_records(requests.get(url, stream=True)):
-#         counter += 1
-#
-#     assert counter == lines_in_response
+@pytest.mark.slow
+@pytest.mark.limit_memory("20 MB")
+def test_response_to_file_extractor_memory_usage(requests_mock, large_events_response):
+    lines_in_response, file_path = large_events_response
+    extractor = ResponseToFileExtractor({})
+
+    url = "https://for-all-mankind.nasa.com/api/v1/users/users1"
+    requests_mock.get(url, body=open(file_path, "rb"))
+
+    counter = 0
+    for _ in extractor.extract_records(requests.get(url, stream=True)):
+        counter += 1
+
+    assert counter == lines_in_response
diff --git a/unit_tests/sources/streams/concurrent/test_partition_reader.py b/unit_tests/sources/streams/concurrent/test_partition_reader.py
@@ -1,6 +1,5 @@
-#
-# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
-#
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+
 import unittest
 from queue import Queue
 from typing import Callable, Iterable, List
@@ -57,7 +56,7 @@ def test_given_read_partition_successful_when_process_partition_then_queue_recor
         cursor.observe.assert_called()
         cursor.close_partition.assert_called_once()
 
-    def test_given_exception_when_process_partition_then_queue_records_and_exception_and_sentinel(
+    def test_given_exception_from_read_when_process_partition_then_queue_records_and_exception_and_sentinel(
         self,
     ):
         partition = Mock()
@@ -73,6 +72,23 @@ def test_given_exception_when_process_partition_then_queue_records_and_exception
             PartitionCompleteSentinel(partition),
         ]
 
+    def test_given_exception_from_close_slice_when_process_partition_then_queue_records_and_exception_and_sentinel(
+        self,
+    ):
+        partition = self._a_partition(_RECORDS)
+        cursor = Mock()
+        exception = ValueError()
+        cursor.close_partition.side_effect = self._close_partition_with_exception(exception)
+        self._partition_reader.process_partition(partition, cursor)
+
+        queue_content = self._consume_queue()
+
+        # 4 total messages in queue. 2 records, 1 thread exception, 1 partition sentinel value
+        assert len(queue_content) == 4
+        assert queue_content[:2] == _RECORDS
+        assert isinstance(queue_content[2], StreamThreadException)
+        assert queue_content[3] == PartitionCompleteSentinel(partition)
+
     def _a_partition(self, records: List[Record]) -> Partition:
         partition = Mock(spec=Partition)
         partition.read.return_value = iter(records)
@@ -88,6 +104,13 @@ def mocked_function() -> Iterable[Record]:
 
         return mocked_function
 
+    @staticmethod
+    def _close_partition_with_exception(exception: Exception) -> Callable[[Partition], None]:
+        def mocked_function(partition: Partition) -> None:
+            raise exception
+
+        return mocked_function
+
     def _consume_queue(self):
         queue_content = []
         while queue_item := self._queue.get():