add new local class to add data like stream_slice

aldogonzalez8 · aldogonzalez8 · commit 12f8675fac01 · 2025-07-11T20:39:44.000-06:00
diff --git a/airbyte_cdk/sources/declarative/request_local/__init__.py b/airbyte_cdk/sources/declarative/request_local/__init__.py
@@ -0,0 +1,3 @@
+from .request_local import RequestLocal
+
+__all__ = ["RequestLocal"]
diff --git a/airbyte_cdk/sources/declarative/request_local/request_local.py b/airbyte_cdk/sources/declarative/request_local/request_local.py
@@ -0,0 +1,39 @@
+from threading import local, Lock
+
+class RequestLocal(local):
+    _instance = None
+    _lock = Lock()  # Thread-safe singleton creation
+
+    def __new__(cls, *args, **kwargs):
+        # Use double-checked locking for thread safety
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super(RequestLocal, cls).__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        # __init__ will be called every time the class is instantiated,
+        # but the object itself is only created once by __new__.
+        # Use a flag to prevent re-initialization
+        if not hasattr(self, '_initialized'):
+            self._stream_slice = None  # Initialize _stream_slice
+            self._initialized = True
+
+    @property
+    def stream_slice(self):
+        return self._stream_slice
+
+    @stream_slice.setter
+    def stream_slice(self, stream_slice):
+        self._stream_slice = stream_slice
+
+    @classmethod
+    def get_instance(cls):
+        """
+        Get the singleton instance of RequestLocal.
+        This is the recommended way to get the instance.
+        """
+        if cls._instance is None:
+            cls._instance = cls()
+        return cls._instance
diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py
@@ -31,6 +31,7 @@
     combine_mappings,
     get_interpolation_context,
 )
+from airbyte_cdk.sources.declarative.request_local import RequestLocal
 
 
 @dataclass
@@ -449,6 +450,9 @@ def send_request(
         request_body_json: Optional[Mapping[str, Any]] = None,
         log_formatter: Optional[Callable[[requests.Response], Any]] = None,
     ) -> Optional[requests.Response]:
+        request_local = RequestLocal()
+        request_local.stream_slice = stream_slice
+
         request, response = self._http_client.send_request(
             http_method=self.get_method().value,
             url=self._get_url(
diff --git a/airbyte_cdk/sources/streams/http/rate_limiting.py b/airbyte_cdk/sources/streams/http/rate_limiting.py
@@ -17,6 +17,7 @@
     RateLimitBackoffException,
     UserDefinedBackoffException,
 )
+from airbyte_cdk.sources.declarative.request_local import RequestLocal
 
 TRANSIENT_EXCEPTIONS = (
     DefaultBackoffException,
@@ -120,8 +121,9 @@ def sleep_on_ratelimit(details: Mapping[str, Any]) -> None:
             logging_message = (
                 f"Retrying. Sleeping for {retry_after} seconds at {ab_datetime_now()} UTC"
             )
-            if stream_slice:
-                logging_message += f" for slice: {stream_slice}"
+            request_local = RequestLocal()
+            if request_local.stream_slice:
+                logging_message += f" for slice: {request_local.stream_slice}"
             logger.info(logging_message)
             time.sleep(retry_after + 1)  # extra second to cover any fractions of second
 
@@ -156,9 +158,14 @@ def log_retry_attempt(details: Mapping[str, Any]) -> None:
             logger.info(
                 f"Status code: {exc.response.status_code!r}, Response Content: {exc.response.content!r}"
             )
-        logger.info(
-            f"Caught retryable error '{str(exc)}' after {details['tries']} tries. Waiting {details['wait']} seconds then retrying..."
+        logger_slice_info = ""
+        request_local = RequestLocal()
+        if request_local.stream_slice:
+            logger_slice_info = f" for slice: {request_local.stream_slice}"
+        logger_info_message = (
+            f"Caught retryable error '{str(exc)}' after {details['tries']} tries. Waiting {details['wait']} seconds then retrying{logger_slice_info}..."
         )
+        logger.info(logger_info_message)
 
     return backoff.on_exception(  # type: ignore # Decorator function returns a function with a different signature than the input function, so mypy can't infer the type of the returned function
         backoff.expo,
diff --git a/unit_tests/sources/declarative/request_local/test_request_local.py b/unit_tests/sources/declarative/request_local/test_request_local.py
@@ -0,0 +1,167 @@
+import sys
+import os
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+
+from airbyte_cdk.sources.declarative.request_local.request_local import RequestLocal
+
+STREAM_SLICE_KEY = "stream_slice"
+INSTANCE_ID_KEY = "instance_id"
+
+def test_basic_singleton():
+    """Test basic singleton behavior"""
+    # Multiple instantiations return same instance
+    instance1 = RequestLocal()
+    instance2 = RequestLocal()
+    instance3 = RequestLocal()
+
+    assert instance1 is instance2
+    assert instance1 is instance3, "All instances should be the same singleton instance"
+    assert instance2 is instance3, "All instances should be the same singleton instance"
+
+
+    # get_instance class method
+    instance4 = RequestLocal.get_instance()
+    instance1.stream_slice = {"test": "data"}
+
+    # stream_slice property
+    instance1.stream_slice = {"test": "data"}
+    assert instance1.stream_slice is instance4.stream_slice
+    assert instance2.stream_slice is instance4.stream_slice
+
+    return instance1
+
+
+def create_instance_in_thread(thread_id, results):
+    """Function to create instance in a separate thread"""
+    instance = RequestLocal()
+
+    results[thread_id] = {
+        'instance_id': id(instance),
+        'thread_id': threading.get_ident()
+    }
+    time.sleep(0.1)  # Small delay to ensure threads overlap
+
+
+def test_thread_safety():
+    """Ensure that RequestLocal is thread-safe and behaves as a singleton across threads"""
+    print("\n=== Testing Thread Safety ===")
+
+    results = {}
+    threads = []
+    total_treads = 5
+    # Create multiple threads that instantiate RequestLocal
+    for i in range(total_treads):
+        thread = threading.Thread(target=create_instance_in_thread, args=(i, results))
+        threads.append(thread)
+        thread.start()
+
+    # Wait for all threads to complete
+    for thread in threads:
+        thread.join()
+
+    # Analyze results
+    instance_ids = [result[INSTANCE_ID_KEY] for result in results.values()]
+    unique_ids = set(instance_ids)
+
+    assert len(results) == total_treads, "All threads should have created an instance"
+    assert len(unique_ids) == 1, "All threads should see the same singleton instance"
+
+
+
+def test_threading_local_behavior():
+    """Test how threading.local affects the singleton"""
+    def thread_func(thread_name, shared_results, time_sleep):
+        instance = RequestLocal()
+        assert instance.stream_slice == None, "Initial stream_slice should be empty"
+        instance.stream_slice = {f"data_from_{thread_name}": True}
+
+        shared_results[thread_name] = {
+            'instance_id': id(instance),
+            'stream_slice': instance.stream_slice.copy(),
+            'thread_id': threading.get_ident()
+        }
+
+        # Check if we can see data from other threads
+        # this should not happen as RequestLocal is a singleton
+        time.sleep(time_sleep)
+        shared_results[f"{thread_name}_after_sleep"] = {
+            'instance_id': id(instance),
+            'stream_slice': instance.stream_slice.copy(),
+            'end_time': time.time(),
+        }
+    
+    results = {}
+    threads = {}
+    threads_amount = 3
+    time_sleep = 0.9
+    thread_names = []
+    for i in range(threads_amount):
+        tread_name = f"thread_{i}"
+        thread_names.append(tread_name)
+        thread = threading.Thread(target=thread_func, args=(tread_name, results, time_sleep))
+        time_sleep /=3  # Decrease sleep time for each thread to ensure they overlap
+        threads[tread_name]= thread
+        thread.start()
+
+    for _, thread in threads.items():
+        thread.join()
+
+    end_times = [results[thread_name + "_after_sleep"]['end_time'] for thread_name in thread_names]
+    last_end_time = end_times.pop()
+    while end_times:
+        current_end_time = end_times.pop()
+        # Just checking the last thread created ended before the previous ones
+        # so we could ensure the first thread created that sleep for a longer time
+        # was not affected by the other threads
+        assert last_end_time < current_end_time, "End times should be in increasing order"
+        last_end_time = current_end_time
+
+    assert len(thread_names) > 1
+    assert len(set(thread_names)) == len(thread_names), "Thread names should be unique"
+    for curren_thread_name in thread_names:
+        current_thread_name_after_sleep = f"{curren_thread_name}_after_sleep"
+        assert results[curren_thread_name][STREAM_SLICE_KEY] == results[current_thread_name_after_sleep][STREAM_SLICE_KEY], \
+            f"Stream slice should remain consistent across thread {curren_thread_name} before and after sleep"
+        assert results[curren_thread_name][INSTANCE_ID_KEY] == results[current_thread_name_after_sleep][INSTANCE_ID_KEY], \
+            f"Instance ID should remain consistent across thread {curren_thread_name} before and after sleep"
+
+        # Check if stream slices are different across threads
+        # but same instance ID
+        for other_tread_name in [thread_name for thread_name in thread_names if thread_name != curren_thread_name]:
+            assert results[curren_thread_name][STREAM_SLICE_KEY] != results[other_tread_name][STREAM_SLICE_KEY], \
+                f"Stream slices from different threads should not be the same: {curren_thread_name} vs {other_tread_name}"
+            assert results[curren_thread_name][INSTANCE_ID_KEY] == results[other_tread_name][INSTANCE_ID_KEY]
+
+# Fixme: Uncomment this test put asserts and remove prints to test concurrent access
+# def test_concurrent_access():
+#     """Test concurrent access using ThreadPoolExecutor"""
+#     print("\n=== Testing Concurrent Access ===")
+#
+#     def worker(worker_id):
+#         instance = RequestLocal()
+#         return {
+#             'worker_id': worker_id,
+#             'instance_id': id(instance),
+#             'thread_id': threading.get_ident()
+#         }
+#
+#     with ThreadPoolExecutor(max_workers=10) as executor:
+#         futures = [executor.submit(worker, i) for i in range(20)]
+#         results = [future.result() for future in futures]
+#
+#     # Analyze results
+#     instance_ids = [result[INSTANCE_ID_KEY] for result in results]
+#     unique_ids = set(instance_ids)
+#
+#     print(f"Total workers: {len(results)}")
+#     print(f"Unique instance IDs: {len(unique_ids)}")
+#     print(f"Singleton behavior maintained: {len(unique_ids) == 1}")
+#
+#     # Show first few results
+#     print("First 5 results:")
+#     for result in results[:5]:
+#         print(f"  Worker {result['worker_id']}: ID={result[INSTANCE_ID_KEY]}")
+
diff --git a/unit_tests/sources/declarative/requesters/test_http_requester.py b/unit_tests/sources/declarative/requesters/test_http_requester.py
@@ -2,7 +2,8 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 
-from datetime import timedelta
+import logging
+
 from typing import Any, Mapping, Optional
 from unittest import mock
 from unittest.mock import MagicMock
@@ -35,7 +36,7 @@
     MovingWindowCallRatePolicy,
     Rate,
 )
-from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
+from airbyte_cdk.sources.streams.http.exceptions import RateLimitBackoffException
 from airbyte_cdk.sources.streams.http.exceptions import (
     RequestBodyException,
     UserDefinedBackoffException,
@@ -216,6 +217,30 @@ def create_requester(
     requester._http_client._session.send.return_value = req
     return requester
 
+def create_requester_rate_limited(
+    url_base: Optional[str] = None,
+    parameters: Optional[Mapping[str, Any]] = {},
+    config: Optional[Config] = None,
+    path: Optional[str] = None,
+    authenticator: Optional[DeclarativeAuthenticator] = None,
+    error_handler: Optional[ErrorHandler] = None,
+) -> HttpRequester:
+    requester = HttpRequester(
+        name="name",
+        url_base=url_base or "https://example.com",
+        path=path or "deals",
+        http_method=HttpMethod.GET,
+        request_options_provider=None,
+        authenticator=authenticator,
+        error_handler=error_handler,
+        config=config or {},
+        parameters=parameters or {},
+    )
+    requester._http_client._session.send = MagicMock()
+    req = requests.Response()
+    req.status_code = 429  # Simulating a rate limit response
+    requester._http_client._session.send.return_value = req
+    return requester
 
 def test_basic_send_request():
     options_provider = MagicMock()
@@ -229,6 +254,19 @@ def test_basic_send_request():
     assert sent_request.headers["my_header"] == "my_value"
     assert sent_request.body is None
 
+@pytest.mark.usefixtures("mock_sleep")
+def test_send_request_rate_limited(caplog):
+    options_provider = MagicMock()
+    options_provider.get_request_headers.return_value = {"my_header": "my_value"}
+    requester = create_requester_rate_limited()
+    requester._request_options_provider = options_provider
+    with caplog.at_level(logging.INFO, logger="airbyte"):
+        with pytest.raises(RateLimitBackoffException):
+            requester.send_request(stream_slice={"start": "2012"})
+
+
+    logged_messages = [record.message for record in caplog.records]
+    assert "Caught retryable error 'Too many requests.' after 1 tries. Waiting 1 seconds then retrying for slice: {'start': '2012'}..." in logged_messages
 
 @pytest.mark.parametrize(
     "provider_data, provider_json, param_data, param_json, authenticator_data, authenticator_json, expected_exception, expected_body",

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .request_local import RequestLocal`
	`2`	`+`
	`3`	`+__all__ = ["RequestLocal"]`