Skip to content

Commit ed92c40

Browse files
committed
splitting up in modules
1 parent f47e5d6 commit ed92c40

File tree

11 files changed

+222
-165
lines changed

11 files changed

+222
-165
lines changed

src/cradl/backoff.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import functools
2+
import time
3+
from typing import Union, Type, Callable
4+
5+
6+
def exponential_backoff(
7+
exceptions: Union[tuple[Type[Exception]], Type[Exception]],
8+
base_wait: int = 1,
9+
max_time: float = None,
10+
max_tries: int = None,
11+
rate: int = 2,
12+
giveup: Callable = lambda e: False,
13+
) -> Callable:
14+
# Return a function which decorates a target with a retry loop.
15+
# Adapted from https://github.com/litl/backoff
16+
# Backoff is exponential: t = base_wait * rate ^ (trial #)
17+
if not max_time and not max_tries:
18+
raise ValueError('Must set at least one of max_time or max_tries')
19+
20+
def decorate(target):
21+
@functools.wraps(target)
22+
def retry(*args, **kwargs):
23+
start = time.time()
24+
trial_no = 0
25+
while True:
26+
try:
27+
ret = target(*args, **kwargs)
28+
except exceptions as e:
29+
elapsed = time.time() - start
30+
next_wait_seconds = base_wait * rate ** trial_no
31+
last_try_done = max_tries and trial_no == max_tries - 1
32+
no_time_for_next_try = max_time and (elapsed + next_wait_seconds > max_time)
33+
if giveup(e) or last_try_done or no_time_for_next_try:
34+
raise
35+
36+
time.sleep(next_wait_seconds)
37+
else:
38+
return ret
39+
trial_no += 1
40+
return retry
41+
return decorate

src/cradl/client.py

Lines changed: 15 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,23 @@
1-
import binascii
2-
import filetype
31
import io
42
import json
5-
import logging
6-
from base64 import b64encode, b64decode
3+
from base64 import b64encode
74
from datetime import datetime
8-
from functools import singledispatch
95
from pathlib import Path
10-
from json.decoder import JSONDecodeError
11-
from typing import Any, Callable, Dict, List, Optional, Sequence, Union
6+
7+
from typing import Callable, Dict, List, Optional, Sequence, Union
128
from urllib.parse import urlparse, quote
139

1410
import requests
15-
from backoff import expo, on_exception # type: ignore
1611
from requests.exceptions import RequestException
1712

1813
from .credentials import Credentials, guess_credentials
14+
from .content import parse_content
15+
from .log import setup_logging
16+
from .backoff import exponential_backoff
17+
from .response import decode_response, TooManyRequestsException, EmptyRequestError
1918

2019

21-
logger = logging.getLogger(__name__)
22-
handler = logging.StreamHandler()
23-
handler.setFormatter(logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'))
24-
logger.addHandler(handler)
25-
20+
logger = setup_logging(__name__)
2621
Content = Union[bytes, bytearray, str, Path, io.IOBase]
2722
Queryparam = Union[str, List[str]]
2823

@@ -44,146 +39,15 @@ def _fatal_code(e):
4439
return 400 <= e.response.status_code < 500
4540

4641

47-
def _decode_response(response, return_json=True):
48-
try:
49-
response.raise_for_status()
50-
if return_json:
51-
return response.json()
52-
else:
53-
return response.content
54-
except JSONDecodeError as e:
55-
56-
if response.status_code == 204:
57-
return {'Your request executed successfully': '204'}
58-
59-
logger.error('Status code {} body:\n{}'.format(response.status_code, response.text))
60-
raise e
61-
except Exception as e:
62-
logger.error('Status code {} body:\n{}'.format(response.status_code, response.text))
63-
64-
if response.status_code == 400:
65-
message = response.json().get('message', response.text)
66-
raise BadRequest(message)
67-
68-
if response.status_code == 403 and 'Forbidden' in response.json().values():
69-
raise InvalidCredentialsException('Credentials provided are not valid.')
70-
71-
if response.status_code == 404:
72-
message = response.json().get('message', response.text)
73-
raise NotFound(message)
74-
75-
if response.status_code == 429 and 'Too Many Requests' in response.json().values():
76-
raise TooManyRequestsException('You have reached the limit of requests per second.')
77-
78-
if response.status_code == 429 and 'Limit Exceeded' in response.json().values():
79-
raise LimitExceededException('You have reached the limit of total requests per month.')
80-
81-
raise e
82-
83-
84-
def _guess_content_type(raw):
85-
guessed_type = filetype.guess(raw)
86-
assert guessed_type, 'Could not determine content type of document. ' \
87-
'Please provide it by specifying content_type'
88-
return guessed_type.mime
89-
90-
91-
def _parsed_content(raw, find_content_type, base_64_encode):
92-
content_type = _guess_content_type(raw) if find_content_type else None
93-
parsed_content = b64encode(raw).decode() if base_64_encode else raw
94-
return parsed_content, content_type
95-
96-
97-
@singledispatch
98-
def parse_content(content, find_content_type=False, base_64_encode=True):
99-
raise TypeError(
100-
'\n'.join([
101-
f'Could not parse content {content} of type {type(content)}',
102-
'Specify content by using one of the options below:',
103-
'1. Path to a file either as a string or as a Path object',
104-
'2. Bytes object with b64encoding',
105-
'3. Bytes object without b64encoding',
106-
'4. IO Stream of either bytes or text',
107-
])
108-
)
109-
110-
111-
@parse_content.register(str)
112-
@parse_content.register(Path)
113-
def _(content, find_content_type=False, base_64_encode=True):
114-
raw = Path(content).read_bytes()
115-
return _parsed_content(raw, find_content_type, base_64_encode)
116-
117-
118-
@parse_content.register(bytes)
119-
@parse_content.register(bytearray)
120-
def _(content, find_content_type=False, base_64_encode=True):
121-
try:
122-
raw = b64decode(content, validate=True)
123-
except binascii.Error:
124-
raw = content
125-
return _parsed_content(raw, find_content_type, base_64_encode)
126-
127-
128-
@parse_content.register(io.IOBase)
129-
def _(content, find_content_type=False, base_64_encode=True):
130-
raw = content.read()
131-
raw = raw.encode() if isinstance(raw, str) else raw
132-
return _parsed_content(raw, find_content_type, base_64_encode)
133-
134-
135-
class EmptyRequestError(ValueError):
136-
"""An EmptyRequestError is raised if the request body is empty when expected not to be empty."""
137-
pass
138-
139-
140-
class ClientException(Exception):
141-
"""A ClientException is raised if the client refuses to
142-
send request due to incorrect usage or bad request data."""
143-
pass
144-
145-
146-
class InvalidCredentialsException(ClientException):
147-
"""An InvalidCredentialsException is raised if api key, access key id or secret access key is invalid."""
148-
pass
149-
150-
151-
class TooManyRequestsException(ClientException):
152-
"""A TooManyRequestsException is raised if you have reached the number of requests per second limit
153-
associated with your credentials."""
154-
pass
155-
156-
157-
class LimitExceededException(ClientException):
158-
"""A LimitExceededException is raised if you have reached the limit of total requests per month
159-
associated with your credentials."""
160-
pass
161-
162-
163-
class BadRequest(ClientException):
164-
"""BadRequest is raised if you have made a request that is disqualified based on the input"""
165-
pass
166-
167-
168-
class NotFound(ClientException):
169-
"""NotFound is raised when you try to access a resource that is not found"""
170-
pass
171-
172-
173-
class FileFormatException(ClientException):
174-
"""A FileFormatException is raised if the file format is not supported by the api."""
175-
pass
176-
177-
17842
class Client:
17943
"""A low level client to invoke api methods from Cradl."""
18044
def __init__(self, credentials: Optional[Credentials] = None, profile=None):
18145
""":param credentials: Credentials to use, instance of :py:class:`~cradl.Credentials`
18246
:type credentials: Credentials"""
18347
self.credentials = credentials or guess_credentials(profile)
18448

185-
@on_exception(expo, TooManyRequestsException, max_tries=4)
186-
@on_exception(expo, RequestException, max_tries=3, giveup=_fatal_code)
49+
@exponential_backoff(TooManyRequestsException, max_tries=4)
50+
@exponential_backoff(RequestException, max_tries=3, giveup=_fatal_code)
18751
def _make_request(
18852
self,
18953
requests_fn: Callable,
@@ -212,10 +76,10 @@ def _make_request(
21276
headers=headers,
21377
**kwargs,
21478
)
215-
return _decode_response(response)
79+
return decode_response(response)
21680

217-
@on_exception(expo, TooManyRequestsException, max_tries=4)
218-
@on_exception(expo, RequestException, max_tries=3, giveup=_fatal_code)
81+
@exponential_backoff(TooManyRequestsException, max_tries=4)
82+
@exponential_backoff(RequestException, max_tries=3, giveup=_fatal_code)
21983
def _make_fileserver_request(
22084
self,
22185
requests_fn: Callable,
@@ -237,7 +101,7 @@ def _make_fileserver_request(
237101
headers=headers,
238102
**kwargs,
239103
)
240-
return _decode_response(response, return_json=False)
104+
return decode_response(response, return_json=False)
241105

242106
def create_app_client(
243107
self,
@@ -943,7 +807,7 @@ def get_document(
943807
def update_document(
944808
self,
945809
document_id: str,
946-
ground_truth: Sequence[Dict[str, Union[Optional[str], bool]]] = None, # For backwards compatibility reasons, this is placed before the *
810+
ground_truth: Sequence[Dict[str, Union[Optional[str], bool]]] = None, # For backwards compatibility reasons, this is placed before the *
947811
*,
948812
metadata: Optional[dict] = None,
949813
dataset_id: str = None,

src/cradl/content.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import io
2+
import functools
3+
import binascii
4+
import filetype
5+
from base64 import b64encode, b64decode
6+
from pathlib import Path
7+
8+
9+
def _guess_content_type(raw):
10+
guessed_type = filetype.guess(raw)
11+
assert guessed_type, 'Could not determine content type of document. ' \
12+
'Please provide it by specifying content_type'
13+
return guessed_type.mime
14+
15+
16+
def _parsed_content(raw, find_content_type, base_64_encode):
17+
content_type = _guess_content_type(raw) if find_content_type else None
18+
parsed_content = b64encode(raw).decode() if base_64_encode else raw
19+
return parsed_content, content_type
20+
21+
22+
@functools.singledispatch
23+
def parse_content(content, find_content_type=False, base_64_encode=True):
24+
raise TypeError(
25+
'\n'.join([
26+
f'Could not parse content {content} of type {type(content)}',
27+
'Specify content by using one of the options below:',
28+
'1. Path to a file either as a string or as a Path object',
29+
'2. Bytes object with b64encoding',
30+
'3. Bytes object without b64encoding',
31+
'4. IO Stream of either bytes or text',
32+
])
33+
)
34+
35+
36+
@parse_content.register(str)
37+
@parse_content.register(Path)
38+
def _(content, find_content_type=False, base_64_encode=True):
39+
raw = Path(content).read_bytes()
40+
return _parsed_content(raw, find_content_type, base_64_encode)
41+
42+
43+
@parse_content.register(bytes)
44+
@parse_content.register(bytearray)
45+
def _(content, find_content_type=False, base_64_encode=True):
46+
try:
47+
raw = b64decode(content, validate=True)
48+
except binascii.Error:
49+
raw = content
50+
return _parsed_content(raw, find_content_type, base_64_encode)
51+
52+
53+
@parse_content.register(io.IOBase)
54+
def _(content, find_content_type=False, base_64_encode=True):
55+
raw = content.read()
56+
raw = raw.encode() if isinstance(raw, str) else raw
57+
return _parsed_content(raw, find_content_type, base_64_encode)

src/cradl/credentials.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json
2-
import logging
32
import os
43
import time
54
from os.path import exists, expanduser
@@ -9,7 +8,10 @@
98
import requests
109
from requests.auth import HTTPBasicAuth
1110

11+
from .log import setup_logging
1212

13+
14+
logger = setup_logging(__name__)
1315
NULL_TOKEN = '', 0
1416

1517

@@ -82,7 +84,7 @@ def read_token_from_cache(cached_profile: str, cache_path: Path):
8284
cache = json.loads(cache_path.read_text())
8385
return cache[cached_profile]['access_token'], cache[cached_profile]['expires_in']
8486
except Exception as e:
85-
logging.warning(e)
87+
logger.warning(e)
8688

8789
return NULL_TOKEN
8890

src/cradl/log.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import logging
2+
3+
4+
def setup_logging(name):
5+
logger = logging.getLogger(name)
6+
handler = logging.StreamHandler()
7+
handler.setFormatter(logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'))
8+
logger.addHandler(handler)
9+
return logger

0 commit comments

Comments
 (0)