66import requests
77from unstructured_client import UnstructuredClient
88from unstructured_client .models import operations , shared
9+ from unstructured_client .utils import retries
910
1011from unstructured .documents .elements import Element
1112from unstructured .logger import logger
1213from unstructured .partition .common .common import exactly_one
1314from unstructured .staging .base import elements_from_dicts , elements_from_json
1415
16+ # Default retry configuration taken from the client code
17+ DEFAULT_RETRIES_INITIAL_INTERVAL_SEC = 3000
18+ DEFAULT_RETRIES_MAX_INTERVAL_SEC = 720000
19+ DEFAULT_RETRIES_EXPONENT = 1.5
20+ DEFAULT_RETRIES_MAX_ELAPSED_TIME_SEC = 1800000
21+ DEFAULT_RETRIES_CONNECTION_ERRORS = True
22+
1523
1624def partition_via_api (
1725 filename : Optional [str ] = None ,
@@ -21,6 +29,11 @@ def partition_via_api(
2129 api_url : str = "https://api.unstructured.io/general/v0/general" ,
2230 api_key : str = "" ,
2331 metadata_filename : Optional [str ] = None ,
32+ retries_initial_interval : [int ] = None ,
33+ retries_max_interval : Optional [int ] = None ,
34+ retries_exponent : Optional [float ] = None ,
35+ retries_max_elapsed_time : Optional [int ] = None ,
36+ retries_connection_errors : Optional [bool ] = None ,
2437 ** request_kwargs : Any ,
2538) -> list [Element ]:
2639 """Partitions a document using the Unstructured REST API. This is equivalent to
@@ -44,6 +57,21 @@ def partition_via_api(
4457 The URL for the Unstructured API. Defaults to the hosted Unstructured API.
4558 api_key
4659 The API key to pass to the Unstructured API.
60+ retries_initial_interval
61+ Defines the time interval (in seconds) to wait before the first retry in case of a request
62+ failure. Defaults to 3000. If set should be > 0.
63+ retries_max_interval
64+ Defines the maximum time interval (in seconds) to wait between retries (the interval
65+ between retries is increased as using exponential increase algorithm
66+ - this setting limits it). Defaults to 720000. If set should be > 0.
67+ retries_exponent
68+ Defines the exponential factor to increase the interval between retries. Defaults to 1.5.
69+ If set should be > 0.0.
70+ retries_max_elapsed_time
71+ Defines the maximum time (in seconds) to wait for retries. If exceeded, the original
72+ exception is raised. Defaults to 1800000. If set should be > 0.
73+ retries_connection_errors
74+ Defines whether to retry on connection errors. Defaults to True.
4775 request_kwargs
4876 Additional parameters to pass to the data field of the request to the Unstructured API.
4977 For example the `strategy` parameter.
@@ -87,7 +115,19 @@ def partition_via_api(
87115 partition_parameters = shared .PartitionParameters (files = files , ** request_kwargs )
88116 )
89117
90- response = sdk .general .partition (request = req )
118+ retries_config = get_retries_config (
119+ retries_connection_errors = retries_connection_errors ,
120+ retries_exponent = retries_exponent ,
121+ retries_initial_interval = retries_initial_interval ,
122+ retries_max_elapsed_time = retries_max_elapsed_time ,
123+ retries_max_interval = retries_max_interval ,
124+ sdk = sdk ,
125+ )
126+
127+ response = sdk .general .partition (
128+ request = req ,
129+ retries = retries_config ,
130+ )
91131
92132 if response .status_code == 200 :
93133 return elements_from_json (text = response .raw_response .text )
@@ -97,6 +137,97 @@ def partition_via_api(
97137 )
98138
99139
140+ def get_retries_config (
141+ retries_connection_errors : Optional [bool ],
142+ retries_exponent : Optional [float ],
143+ retries_initial_interval : Optional [int ],
144+ retries_max_elapsed_time : Optional [int ],
145+ retries_max_interval : Optional [int ],
146+ sdk : UnstructuredClient ,
147+ ) -> Optional [retries .RetryConfig ]:
148+ """Constructs a RetryConfig object from the provided parameters. If any of the parameters
149+ are None, the default values are taken from the SDK configuration or the default constants.
150+
151+ If all parameters are None, returns None (and the SDK-managed defaults are used within the
152+ client)
153+
154+ The solution is not perfect as the RetryConfig object does not include the defaults by
155+ itself so we might need to construct it basing on our defaults.
156+
157+ Parameters
158+ ----------
159+ retries_connection_errors
160+ Defines whether to retry on connection errors. If not set the
161+ DEFAULT_RETRIES_CONNECTION_ERRORS constant is used.
162+ retries_exponent
163+ Defines the exponential factor to increase the interval between retries.
164+ If set, should be > 0.0 (otherwise the DEFAULT_RETRIES_EXPONENT constant is used)
165+ retries_initial_interval
166+ Defines the time interval to wait before the first retry in case of a request failure.
167+ If set, should be > 0 (otherwise the DEFAULT_RETRIES_INITIAL_INTERVAL_SEC constant is used)
168+ retries_max_elapsed_time
169+ Defines the maximum time to wait for retries. If exceeded, the original exception is raised.
170+ If set, should be > 0 (otherwise the DEFAULT_RETRIES_MAX_ELAPSED_TIME_SEC constant is used)
171+ retries_max_interval
172+ Defines the maximum time interval to wait between retries. If set, should be > 0
173+ (otherwise the DEFAULT_RETRIES_MAX_INTERVAL_SEC constant is used)
174+ sdk
175+ The UnstructuredClient object to take the default values from.
176+ """
177+ retries_config = None
178+ sdk_default_retries_config = sdk .sdk_configuration .retry_config
179+ if any (
180+ setting is not None
181+ for setting in (
182+ retries_initial_interval ,
183+ retries_max_interval ,
184+ retries_exponent ,
185+ retries_max_elapsed_time ,
186+ retries_connection_errors ,
187+ )
188+ ):
189+
190+ def get_backoff_default (setting_name : str , default_value : Any ) -> Any :
191+ if sdk_default_retries_config : # noqa: SIM102
192+ if setting_value := getattr (sdk_default_retries_config .backoff , setting_name ):
193+ return setting_value
194+ return default_value
195+
196+ default_retries_connneciton_errors = (
197+ sdk_default_retries_config .retry_connection_errors
198+ if sdk_default_retries_config .retry_connection_errors is not None
199+ else DEFAULT_RETRIES_CONNECTION_ERRORS
200+ )
201+
202+ backoff_strategy = retries .BackoffStrategy (
203+ initial_interval = (
204+ retries_initial_interval
205+ or get_backoff_default ("initial_interval" , DEFAULT_RETRIES_INITIAL_INTERVAL_SEC )
206+ ),
207+ max_interval = (
208+ retries_max_interval
209+ or get_backoff_default ("max_interval" , DEFAULT_RETRIES_MAX_INTERVAL_SEC )
210+ ),
211+ exponent = (
212+ retries_exponent or get_backoff_default ("exponent" , DEFAULT_RETRIES_EXPONENT )
213+ ),
214+ max_elapsed_time = (
215+ retries_max_elapsed_time
216+ or get_backoff_default ("max_elapsed_time" , DEFAULT_RETRIES_MAX_ELAPSED_TIME_SEC )
217+ ),
218+ )
219+ retries_config = retries .RetryConfig (
220+ strategy = "backoff" ,
221+ backoff = backoff_strategy ,
222+ retry_connection_errors = (
223+ retries_connection_errors
224+ if retries_connection_errors is not None
225+ else default_retries_connneciton_errors
226+ ),
227+ )
228+ return retries_config
229+
230+
100231def partition_multiple_via_api (
101232 filenames : Optional [list [str ]] = None ,
102233 content_types : Optional [list [str ]] = None ,
0 commit comments