Skip to content

Commit d4e1841

Browse files
improve: retries with exponential backoffs
1 parent 9dfaba1 commit d4e1841

File tree

1 file changed

+49
-17
lines changed

1 file changed

+49
-17
lines changed

fast_s3/fetcher.py

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import multiprocessing
2+
import random
3+
import time
24
import warnings
35
from pathlib import Path
46
from queue import Empty
5-
from typing import Generator, List, Tuple, Union
7+
from typing import Callable, Generator, List, Tuple, Union
68

79
import boto3
10+
import botocore.exceptions
811

912
from .file import File, Status
1013

@@ -18,10 +21,13 @@ def __init__(
1821
aws_secret_access_key: str,
1922
region_name: str,
2023
bucket_name: str,
21-
buffer_size: int = 1000,
22-
n_workers=32,
23-
worker_batch_size=100,
24-
callback=lambda x: x,
24+
buffer_size: int = 1024,
25+
n_workers: int = 32,
26+
worker_batch_size: int = 128,
27+
n_retries: int = 3,
28+
backoff_factor: float = 0.5,
29+
verbose: bool = False,
30+
callback: Callable = lambda x: x,
2531
ordered: bool = False,
2632
):
2733
self.paths = multiprocessing.Manager().list(list(enumerate(paths))[::-1])
@@ -33,6 +39,9 @@ def __init__(
3339
self.n_workers = n_workers
3440
self.buffer_size = min(buffer_size, len(paths))
3541
self.worker_batch_size = worker_batch_size
42+
self.n_retries = n_retries
43+
self.backoff_factor = backoff_factor
44+
self.verbose = verbose
3645
self.ordered = ordered
3746
self.callback = callback
3847

@@ -58,18 +67,41 @@ def _create_s3_client(self):
5867
def download_batch(self, batch: List[Tuple[int, Union[Path, str]]]):
5968
client = self._create_s3_client()
6069
for index, path in batch:
61-
try:
62-
file = File(
63-
content=self.callback(
64-
client.get_object(Bucket=self.bucket_name, Key=str(path))[
65-
"Body"
66-
].read()
67-
),
68-
path=path,
69-
status=Status.succeeded,
70-
)
71-
except Exception as e:
72-
file = File(content=None, path=path, status=Status.failed, exception=e)
70+
for attempt in range(self.n_retries):
71+
try:
72+
file = File(
73+
content=self.callback(
74+
client.get_object(Bucket=self.bucket_name, Key=str(path))[
75+
"Body"
76+
].read()
77+
),
78+
path=path,
79+
status=Status.succeeded,
80+
)
81+
break
82+
except (
83+
botocore.exceptions.EndpointConnectionError,
84+
botocore.exceptions.NoCredentialsError,
85+
botocore.exceptions.PartialCredentialsError,
86+
botocore.exceptions.SSLError,
87+
botocore.exceptions.ClientError,
88+
botocore.exceptions.BotoCoreError,
89+
ConnectionError,
90+
) as e:
91+
wait_time = self.backoff_factor * (2**attempt) + random.uniform(
92+
0, 1
93+
)
94+
if self.verbose:
95+
print(
96+
f"Retrying {path} due to: {e}. Waiting {wait_time:.2f} seconds before retrying..."
97+
)
98+
time.sleep(wait_time)
99+
file = File(
100+
content=None, path=path, status=Status.failed, exception=e
101+
)
102+
else:
103+
if self.verbose:
104+
print(f"Failed to download {path} after {self.n_retries} retries")
73105
if self.ordered:
74106
self.results[index] = file
75107
else:

0 commit comments

Comments
 (0)