Skip to content

Commit e5e31a9

Browse files
authored
Merge branch 'main' into main
2 parents ee035e1 + 3dab457 commit e5e31a9

File tree

9 files changed

+417
-218
lines changed

9 files changed

+417
-218
lines changed

databricks/sdk/core.py

Lines changed: 17 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import re
2-
import urllib.parse
32
from datetime import timedelta
4-
from json import JSONDecodeError
53
from types import TracebackType
64
from typing import Any, BinaryIO, Iterator, Type
75
from urllib.parse import urlencode
@@ -12,8 +10,8 @@
1210
from .config import *
1311
# To preserve backwards compatibility (as these definitions were previously in this module)
1412
from .credentials_provider import *
15-
from .errors import DatabricksError, error_mapper
16-
from .errors.private_link import _is_private_link_redirect
13+
from .errors import DatabricksError, get_api_error
14+
from .logger import RoundTrip
1715
from .oauth import retrieve_token
1816
from .retries import retried
1917

@@ -262,134 +260,23 @@ def _perform(self,
262260
auth=auth,
263261
stream=raw,
264262
timeout=self._http_timeout_seconds)
265-
try:
266-
self._record_request_log(response, raw=raw or data is not None or files is not None)
267-
if not response.ok: # internally calls response.raise_for_status()
268-
# TODO: experiment with traceback pruning for better readability
269-
# See https://stackoverflow.com/a/58821552/277035
270-
payload = response.json()
271-
raise self._make_nicer_error(response=response, **payload) from None
272-
# Private link failures happen via a redirect to the login page. From a requests-perspective, the request
273-
# is successful, but the response is not what we expect. We need to handle this case separately.
274-
if _is_private_link_redirect(response):
275-
raise self._make_nicer_error(response=response) from None
276-
return response
277-
except requests.exceptions.JSONDecodeError:
278-
message = self._make_sense_from_html(response.text)
279-
if not message:
280-
message = response.reason
281-
raise self._make_nicer_error(response=response, message=message) from None
282-
283-
@staticmethod
284-
def _make_sense_from_html(txt: str) -> str:
285-
matchers = [r'<pre>(.*)</pre>', r'<title>(.*)</title>']
286-
for attempt in matchers:
287-
expr = re.compile(attempt, re.MULTILINE)
288-
match = expr.search(txt)
289-
if not match:
290-
continue
291-
return match.group(1).strip()
292-
return txt
293-
294-
def _make_nicer_error(self, *, response: requests.Response, **kwargs) -> DatabricksError:
295-
status_code = response.status_code
296-
message = kwargs.get('message', 'request failed')
297-
is_http_unauthorized_or_forbidden = status_code in (401, 403)
298-
is_too_many_requests_or_unavailable = status_code in (429, 503)
299-
if is_http_unauthorized_or_forbidden:
300-
message = self._cfg.wrap_debug_info(message)
301-
if is_too_many_requests_or_unavailable:
302-
kwargs['retry_after_secs'] = self._parse_retry_after(response)
303-
kwargs['message'] = message
304-
return error_mapper(response, kwargs)
305-
306-
def _record_request_log(self, response: requests.Response, raw=False):
263+
self._record_request_log(response, raw=raw or data is not None or files is not None)
264+
error = get_api_error(response)
265+
if error is not None:
266+
status_code = response.status_code
267+
is_http_unauthorized_or_forbidden = status_code in (401, 403)
268+
is_too_many_requests_or_unavailable = status_code in (429, 503)
269+
if is_http_unauthorized_or_forbidden:
270+
error.message = self._cfg.wrap_debug_info(error.message)
271+
if is_too_many_requests_or_unavailable:
272+
error.retry_after_secs = self._parse_retry_after(response)
273+
raise error from None
274+
return response
275+
276+
def _record_request_log(self, response: requests.Response, raw: bool = False) -> None:
307277
if not logger.isEnabledFor(logging.DEBUG):
308278
return
309-
request = response.request
310-
url = urllib.parse.urlparse(request.url)
311-
query = ''
312-
if url.query:
313-
query = f'?{urllib.parse.unquote(url.query)}'
314-
sb = [f'{request.method} {urllib.parse.unquote(url.path)}{query}']
315-
if self._cfg.debug_headers:
316-
if self._cfg.host:
317-
sb.append(f'> * Host: {self._cfg.host}')
318-
for k, v in request.headers.items():
319-
sb.append(f'> * {k}: {self._only_n_bytes(v, self._debug_truncate_bytes)}')
320-
if request.body:
321-
sb.append("> [raw stream]" if raw else self._redacted_dump("> ", request.body))
322-
sb.append(f'< {response.status_code} {response.reason}')
323-
if raw and response.headers.get('Content-Type', None) != 'application/json':
324-
# Raw streams with `Transfer-Encoding: chunked` do not have `Content-Type` header
325-
sb.append("< [raw stream]")
326-
elif response.content:
327-
sb.append(self._redacted_dump("< ", response.content))
328-
logger.debug("\n".join(sb))
329-
330-
@staticmethod
331-
def _mask(m: Dict[str, any]):
332-
for k in m:
333-
if k in {'bytes_value', 'string_value', 'token_value', 'value', 'content'}:
334-
m[k] = "**REDACTED**"
335-
336-
@staticmethod
337-
def _map_keys(m: Dict[str, any]) -> List[str]:
338-
keys = list(m.keys())
339-
keys.sort()
340-
return keys
341-
342-
@staticmethod
343-
def _only_n_bytes(j: str, num_bytes: int = 96) -> str:
344-
diff = len(j.encode('utf-8')) - num_bytes
345-
if diff > 0:
346-
return f"{j[:num_bytes]}... ({diff} more bytes)"
347-
return j
348-
349-
def _recursive_marshal_dict(self, m, budget) -> dict:
350-
out = {}
351-
self._mask(m)
352-
for k in sorted(m.keys()):
353-
raw = self._recursive_marshal(m[k], budget)
354-
out[k] = raw
355-
budget -= len(str(raw))
356-
return out
357-
358-
def _recursive_marshal_list(self, s, budget) -> list:
359-
out = []
360-
for i in range(len(s)):
361-
if i > 0 >= budget:
362-
out.append("... (%d additional elements)" % (len(s) - len(out)))
363-
break
364-
raw = self._recursive_marshal(s[i], budget)
365-
out.append(raw)
366-
budget -= len(str(raw))
367-
return out
368-
369-
def _recursive_marshal(self, v: any, budget: int) -> any:
370-
if isinstance(v, dict):
371-
return self._recursive_marshal_dict(v, budget)
372-
elif isinstance(v, list):
373-
return self._recursive_marshal_list(v, budget)
374-
elif isinstance(v, str):
375-
return self._only_n_bytes(v, self._debug_truncate_bytes)
376-
else:
377-
return v
378-
379-
def _redacted_dump(self, prefix: str, body: str) -> str:
380-
if len(body) == 0:
381-
return ""
382-
try:
383-
# Unmarshal body into primitive types.
384-
tmp = json.loads(body)
385-
max_bytes = 96
386-
if self._debug_truncate_bytes > max_bytes:
387-
max_bytes = self._debug_truncate_bytes
388-
# Re-marshal body taking redaction and character limit into account.
389-
raw = self._recursive_marshal(tmp, max_bytes)
390-
return "\n".join([f'{prefix}{line}' for line in json.dumps(raw, indent=2).split("\n")])
391-
except JSONDecodeError:
392-
return f'{prefix}[non-JSON document of {len(body)} bytes]'
279+
logger.debug(RoundTrip(response, self._cfg.debug_headers, self._debug_truncate_bytes, raw).generate())
393280

394281

395282
class StreamingResponse(BinaryIO):

databricks/sdk/errors/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .base import DatabricksError, ErrorDetail
2-
from .mapper import error_mapper
2+
from .mapper import _error_mapper
3+
from .parser import get_api_error
34
from .platform import *
45
from .private_link import PrivateLinkValidationError
56
from .sdk import *

databricks/sdk/errors/base.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import re
2+
import warnings
23
from dataclasses import dataclass
34
from typing import Dict, List, Optional
45

@@ -41,9 +42,38 @@ def __init__(self,
4142
retry_after_secs: int = None,
4243
details: List[Dict[str, any]] = None,
4344
**kwargs):
45+
"""
46+
47+
:param message:
48+
:param error_code:
49+
:param detail: [Deprecated]
50+
:param status: [Deprecated]
51+
:param scimType: [Deprecated]
52+
:param error: [Deprecated]
53+
:param retry_after_secs:
54+
:param details:
55+
:param kwargs:
56+
"""
57+
# SCIM-specific parameters are deprecated
58+
if detail:
59+
warnings.warn(
60+
"The 'detail' parameter of DatabricksError is deprecated and will be removed in a future version."
61+
)
62+
if scimType:
63+
warnings.warn(
64+
"The 'scimType' parameter of DatabricksError is deprecated and will be removed in a future version."
65+
)
66+
if status:
67+
warnings.warn(
68+
"The 'status' parameter of DatabricksError is deprecated and will be removed in a future version."
69+
)
70+
71+
# API 1.2-specific parameters are deprecated
4472
if error:
45-
# API 1.2 has different response format, let's adapt
46-
message = error
73+
warnings.warn(
74+
"The 'error' parameter of DatabricksError is deprecated and will be removed in a future version."
75+
)
76+
4777
if detail:
4878
# Handle SCIM error message details
4979
# @see https://tools.ietf.org/html/rfc7644#section-3.7.3

databricks/sdk/errors/mapper.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@
44
from databricks.sdk.errors.base import DatabricksError
55

66
from .overrides import _ALL_OVERRIDES
7-
from .private_link import (_get_private_link_validation_error,
8-
_is_private_link_redirect)
97

108

11-
def error_mapper(response: requests.Response, raw: dict) -> DatabricksError:
9+
def _error_mapper(response: requests.Response, raw: dict) -> DatabricksError:
1210
for override in _ALL_OVERRIDES:
1311
if override.matches(response, raw):
1412
return override.custom_error(**raw)
@@ -23,8 +21,6 @@ def error_mapper(response: requests.Response, raw: dict) -> DatabricksError:
2321
# where there's a default exception class per HTTP status code, and we do
2422
# rely on Databricks platform exception mapper to do the right thing.
2523
return platform.STATUS_CODE_MAPPING[status_code](**raw)
26-
if _is_private_link_redirect(response):
27-
return _get_private_link_validation_error(response.url)
2824

2925
# backwards-compatible error creation for cases like using older versions of
3026
# the SDK on way never releases of the platform.

databricks/sdk/errors/parser.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import abc
2+
import json
3+
import logging
4+
import re
5+
from typing import Optional
6+
7+
import requests
8+
9+
from ..logger import RoundTrip
10+
from .base import DatabricksError
11+
from .mapper import _error_mapper
12+
from .private_link import (_get_private_link_validation_error,
13+
_is_private_link_redirect)
14+
15+
16+
class _ErrorParser(abc.ABC):
17+
"""A parser for errors from the Databricks REST API."""
18+
19+
@abc.abstractmethod
20+
def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
21+
"""Parses an error from the Databricks REST API. If the error cannot be parsed, returns None."""
22+
23+
24+
class _EmptyParser(_ErrorParser):
25+
"""A parser that handles empty responses."""
26+
27+
def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
28+
if len(response_body) == 0:
29+
return {'message': response.reason}
30+
return None
31+
32+
33+
class _StandardErrorParser(_ErrorParser):
34+
"""
35+
Parses errors from the Databricks REST API using the standard error format.
36+
"""
37+
38+
def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
39+
try:
40+
payload_str = response_body.decode('utf-8')
41+
resp: dict = json.loads(payload_str)
42+
except json.JSONDecodeError as e:
43+
logging.debug('_StandardErrorParser: unable to deserialize response as json', exc_info=e)
44+
return None
45+
46+
error_args = {
47+
'message': resp.get('message', 'request failed'),
48+
'error_code': resp.get('error_code'),
49+
'details': resp.get('details'),
50+
}
51+
52+
# Handle API 1.2-style errors
53+
if 'error' in resp:
54+
error_args['message'] = resp['error']
55+
56+
# Handle SCIM Errors
57+
detail = resp.get('detail')
58+
status = resp.get('status')
59+
scim_type = resp.get('scimType')
60+
if detail:
61+
# Handle SCIM error message details
62+
# @see https://tools.ietf.org/html/rfc7644#section-3.7.3
63+
error_args[
64+
'message'] = f"{scim_type} {error_args.get('message', 'SCIM API Internal Error')}".strip(" ")
65+
error_args['error_code'] = f"SCIM_{status}"
66+
return error_args
67+
68+
69+
class _StringErrorParser(_ErrorParser):
70+
"""
71+
Parses errors from the Databricks REST API in the format "ERROR_CODE: MESSAGE".
72+
"""
73+
74+
__STRING_ERROR_REGEX = re.compile(r'([A-Z_]+): (.*)')
75+
76+
def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
77+
payload_str = response_body.decode('utf-8')
78+
match = self.__STRING_ERROR_REGEX.match(payload_str)
79+
if not match:
80+
logging.debug('_StringErrorParser: unable to parse response as string')
81+
return None
82+
error_code, message = match.groups()
83+
return {'error_code': error_code, 'message': message, 'status': response.status_code, }
84+
85+
86+
class _HtmlErrorParser(_ErrorParser):
87+
"""
88+
Parses errors from the Databricks REST API in HTML format.
89+
"""
90+
91+
__HTML_ERROR_REGEXES = [re.compile(r'<pre>(.*)</pre>'), re.compile(r'<title>(.*)</title>'), ]
92+
93+
def parse_error(self, response: requests.Response, response_body: bytes) -> Optional[dict]:
94+
payload_str = response_body.decode('utf-8')
95+
for regex in self.__HTML_ERROR_REGEXES:
96+
match = regex.search(payload_str)
97+
if match:
98+
message = match.group(1) if match.group(1) else response.reason
99+
return {
100+
'status': response.status_code,
101+
'message': message,
102+
'error_code': response.reason.upper().replace(' ', '_')
103+
}
104+
logging.debug('_HtmlErrorParser: no <pre> tag found in error response')
105+
return None
106+
107+
108+
# A list of ErrorParsers that are tried in order to parse an API error from a response body. Most errors should be
109+
# parsable by the _StandardErrorParser, but additional parsers can be added here for specific error formats. The order
110+
# of the parsers is not important, as the set of errors that can be parsed by each parser should be disjoint.
111+
_error_parsers = [_EmptyParser(), _StandardErrorParser(), _StringErrorParser(), _HtmlErrorParser(), ]
112+
113+
114+
def _unknown_error(response: requests.Response) -> str:
115+
"""A standard error message that can be shown when an API response cannot be parsed.
116+
117+
This error message includes a link to the issue tracker for the SDK for users to report the issue to us.
118+
"""
119+
request_log = RoundTrip(response, debug_headers=True, debug_truncate_bytes=10 * 1024).generate()
120+
return (
121+
'This is likely a bug in the Databricks SDK for Python or the underlying '
122+
'API. Please report this issue with the following debugging information to the SDK issue tracker at '
123+
f'https://github.com/databricks/databricks-sdk-go/issues. Request log:```{request_log}```')
124+
125+
126+
def get_api_error(response: requests.Response) -> Optional[DatabricksError]:
127+
"""
128+
Handles responses from the REST API and returns a DatabricksError if the response indicates an error.
129+
:param response: The response from the REST API.
130+
:return: A DatabricksError if the response indicates an error, otherwise None.
131+
"""
132+
if not response.ok:
133+
content = response.content
134+
for parser in _error_parsers:
135+
try:
136+
error_args = parser.parse_error(response, content)
137+
if error_args:
138+
return _error_mapper(response, error_args)
139+
except Exception as e:
140+
logging.debug(f'Error parsing response with {parser}, continuing', exc_info=e)
141+
return _error_mapper(response, {'message': 'unable to parse response. ' + _unknown_error(response)})
142+
143+
# Private link failures happen via a redirect to the login page. From a requests-perspective, the request
144+
# is successful, but the response is not what we expect. We need to handle this case separately.
145+
if _is_private_link_redirect(response):
146+
return _get_private_link_validation_error(response.url)

0 commit comments

Comments
 (0)