1717import urllib3 .exceptions
1818
1919import databricks .sql .auth .thrift_http_client
20+ from databricks .sql .auth .thrift_http_client import CommandType
2021from databricks .sql .auth .authenticators import AuthProvider
2122from databricks .sql .thrift_api .TCLIService import TCLIService , ttypes
2223from databricks .sql import *
24+ from databricks .sql .exc import MaxRetryDurationError
2325from databricks .sql .thrift_api .TCLIService .TCLIService import (
2426 Client as TCLIServiceClient ,
2527)
@@ -70,6 +72,12 @@ class ThriftBackend:
7072 CLOSED_OP_STATE = ttypes .TOperationState .CLOSED_STATE
7173 ERROR_OP_STATE = ttypes .TOperationState .ERROR_STATE
7274
75+ _retry_delay_min : float
76+ _retry_delay_max : float
77+ _retry_stop_after_attempts_count : int
78+ _retry_stop_after_attempts_duration : float
79+ _retry_delay_default : float
80+
7381 def __init__ (
7482 self ,
7583 server_hostname : str ,
@@ -113,9 +121,15 @@ def __init__(
113121 #
114122 # _retry_stop_after_attempts_count
115123 # The maximum number of times we should retry retryable requests (defaults to 24)
124+ # _retry_dangerous_codes
125+ # An iterable of integer HTTP status codes. ExecuteStatement commands will be retried if these codes are received.
126+ # (defaults to [])
116127 # _socket_timeout
117128 # The timeout in seconds for socket send, recv and connect operations. Should be a positive float or integer.
118129 # (defaults to 900)
130+ # _enable_v3_retries
131+ # Whether to use the DatabricksRetryPolicy implemented in urllib3
132+ # (defaults to False)
119133 # max_download_threads
120134 # Number of threads for handling cloud fetch downloads. Defaults to 10
121135
@@ -166,10 +180,28 @@ def __init__(
166180
167181 self ._auth_provider = auth_provider
168182
183+ # Connector version 3 retry approach
184+ self .enable_v3_retries = kwargs .get ("_enable_v3_retries" , False )
185+ self .force_dangerous_codes = kwargs .get ("_retry_dangerous_codes" , [])
186+
187+ additional_transport_args = {}
188+ if self .enable_v3_retries :
189+ self .retry_policy = databricks .sql .auth .thrift_http_client .DatabricksRetryPolicy (
190+ delay_min = self ._retry_delay_min ,
191+ delay_max = self ._retry_delay_max ,
192+ stop_after_attempts_count = self ._retry_stop_after_attempts_count ,
193+ stop_after_attempts_duration = self ._retry_stop_after_attempts_duration ,
194+ delay_default = self ._retry_delay_default ,
195+ force_dangerous_codes = self .force_dangerous_codes ,
196+ )
197+
198+ additional_transport_args ["retry_policy" ] = self .retry_policy
199+
169200 self ._transport = databricks .sql .auth .thrift_http_client .THttpClient (
170201 auth_provider = self ._auth_provider ,
171202 uri_or_host = uri ,
172203 ssl_context = ssl_context ,
204+ ** additional_transport_args , # type: ignore
173205 )
174206
175207 timeout = kwargs .get ("_socket_timeout" , DEFAULT_SOCKET_TIMEOUT )
@@ -188,6 +220,7 @@ def __init__(
188220
189221 self ._request_lock = threading .RLock ()
190222
223+ # TODO: Move this bounding logic into DatabricksRetryPolicy for v3 (PECO-918)
191224 def _initialize_retry_args (self , kwargs ):
192225 # Configure retries & timing: use user-settings or defaults, and bound
193226 # by policy. Log.warn when given param gets restricted.
@@ -335,12 +368,17 @@ def attempt_request(attempt):
335368
336369 error , error_message , retry_delay = None , None , None
337370 try :
338- logger .debug (
339- "Sending request: {}(<REDACTED>)" .format (
340- getattr (method , "__name__" )
341- )
342- )
371+
372+ this_method_name = getattr (method , "__name__" )
373+
374+ logger .debug ("Sending request: {}(<REDACTED>)" .format (this_method_name ))
343375 unsafe_logger .debug ("Sending request: {}" .format (request ))
376+
377+ # These three lines are no-ops if the v3 retry policy is not in use
378+ this_command_type = CommandType .get (this_method_name )
379+ self ._transport .set_retry_command_type (this_command_type )
380+ self ._transport .startRetryTimer ()
381+
344382 response = method (request )
345383
346384 # Calling `close()` here releases the active HTTP connection back to the pool
@@ -356,9 +394,16 @@ def attempt_request(attempt):
356394 except urllib3 .exceptions .HTTPError as err :
357395 # retry on timeout. Happens a lot in Azure and it is safe as data has not been sent to server yet
358396
397+ # TODO: don't use exception handling for GOS polling...
398+
359399 gos_name = TCLIServiceClient .GetOperationStatus .__name__
360400 if method .__name__ == gos_name :
361- retry_delay = bound_retry_delay (attempt , self ._retry_delay_default )
401+ delay_default = (
402+ self .enable_v3_retries
403+ and self .retry_policy .delay_default
404+ or self ._retry_delay_default
405+ )
406+ retry_delay = bound_retry_delay (attempt , delay_default )
362407 logger .info (
363408 f"GetOperationStatus failed with HTTP error and will be retried: { str (err )} "
364409 )
0 commit comments