Skip to content

Commit 4ecf18d

Browse files
authored
Support newer versions of smart_open (#167)
* Support newer versions of smart_open * Fix tests to support newer smart_open * Fix types * Remove unneeded smart_open comments
1 parent 74310c5 commit 4ecf18d

File tree

8 files changed

+28
-27
lines changed

8 files changed

+28
-27
lines changed

metrics/bigfiles_high_water_mark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1149
1+
1133

metrics/coverage_high_water_mark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
93.700
1+
93.6900

metrics/mypy_high_water_mark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
92.3000
1+
92.3100

records_mover/url/s3/s3_file_url.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
import logging
2+
3+
import smart_open
24
from .s3_base_url import S3BaseUrl
35
from ..base import BaseDirectoryUrl, BaseFileUrl
46
from typing import IO, List, Optional
57
import threading
68
from time import sleep
79
from s3_concat import S3Concat
810
from smart_open.s3 import open as s3_open
11+
import packaging.version
912

13+
SMART_OPEN_VERSION = packaging.version.parse(smart_open.__version__)
14+
SMART_OPEN_USE_SESSION = SMART_OPEN_VERSION < packaging.version.parse("5.0.0")
1015

1116
logger = logging.getLogger(__name__)
1217

@@ -51,10 +56,16 @@ def __call__(self, bytes_amount: int) -> None:
5156

5257
def open(self, mode: str = "rb") -> IO[bytes]:
5358
try:
54-
return s3_open(bucket_id=self.bucket,
55-
key_id=self.key,
56-
mode=mode,
57-
session=self._boto3_session)
59+
if SMART_OPEN_USE_SESSION:
60+
return s3_open(bucket_id=self.bucket,
61+
key_id=self.key,
62+
mode=mode,
63+
session=self._boto3_session)
64+
else:
65+
return s3_open(bucket_id=self.bucket,
66+
key_id=self.key,
67+
mode=mode,
68+
client=self._boto3_session.client('s3'))
5869
except ValueError as e:
5970
# Example: ValueError: 'b0KD9AkG7XA/_manifest' does not
6071
# exist in the bucket 'vince-scratch', or is forbidden

setup.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -151,23 +151,7 @@ def initialize_options(self) -> None:
151151
]
152152

153153
smart_open_dependencies = [
154-
# smart_open requires rsa>=3.1.4, which causes pip 20.2.4 in
155-
# default mode to install rsa 4.6.
156-
#
157-
# That then results in this error from pip at install-time:
158-
#
159-
# awscli 1.18.178 requires rsa<=4.5.0,>=3.1.2; python_version !=
160-
# "3.4", but you'll have rsa 4.6 which is incompatible.
161-
#
162-
# Also, pipgrip (used to make Python formulas in Homebrew) takes
163-
# somewhere between much longer and forever to provide its output
164-
# without this line.
165-
#
166-
'rsa>=3.1.4,<=4.5.0',
167-
# we rely on exception types from smart_open,
168-
# which seem to change in feature releases
169-
# without a major version bump
170-
'smart_open>=2,<2.1',
154+
'smart_open>=2',
171155
]
172156

173157
gcs_dependencies = [

tests/unit/url/test_s3_file.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from records_mover.url.s3.s3_file_url import S3FileUrl
1+
from records_mover.url.s3.s3_file_url import S3FileUrl, SMART_OPEN_USE_SESSION
22
from mock import patch, Mock, MagicMock, ANY
33
import unittest
44

@@ -12,6 +12,10 @@ def setUp(self):
1212
boto3_session=self.mock_boto3_session)
1313
self.mock_s3_resource = self.mock_boto3_session.resource.return_value
1414
self.mock_s3_client = self.mock_boto3_session.client.return_value
15+
if SMART_OPEN_USE_SESSION:
16+
self.open_boto_args = {"session": self.mock_boto3_session}
17+
else:
18+
self.open_boto_args = {"client": self.mock_s3_client}
1519

1620
def test_aws_creds(self):
1721
self.assertEqual(self.s3_file_url.aws_creds(),
@@ -61,7 +65,7 @@ def test_wait_to_exist_exists_already(self, mock_s3_open):
6165
mock_s3_open.assert_called_with(bucket_id='bucket',
6266
key_id='topdir/bottomdir/file',
6367
mode='rb',
64-
session=self.mock_boto3_session)
68+
**self.open_boto_args)
6569

6670
@patch('records_mover.url.s3.s3_file_url.s3_open')
6771
def test_wait_to_exist_one_loop(self, mock_s3_open):
@@ -77,7 +81,7 @@ def test_wait_to_exist_one_loop(self, mock_s3_open):
7781
mock_s3_open.assert_called_with(bucket_id='bucket',
7882
key_id='topdir/bottomdir/file',
7983
mode='rb',
80-
session=self.mock_boto3_session)
84+
**self.open_boto_args)
8185

8286
@patch('records_mover.url.s3.s3_file_url.s3_open')
8387
def test_open_other_valueerror_passes_through(self, mock_s3_open):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__: str

types/stubs/smart_open/s3/__init__.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def open(bucket_id: str,
1313
buffer_size: int = DEFAULT_BUFFER_SIZE,
1414
min_part_size: int = DEFAULT_MIN_PART_SIZE,
1515
session: Optional[boto3.session.Session] = None,
16+
client: Optional[boto3.client] = None,
1617
resource_kwargs: Optional[dict] = None,
1718
multipart_upload_kwargs: Optional[Dict] = None) -> IO[bytes]:
1819
...

0 commit comments

Comments
 (0)