Skip to content

Commit 9001630

Browse files
authored
[v2 ] Expose CRT S3 file IO options (aws#9838)
1 parent 22ad7ba commit 9001630

File tree

8 files changed

+194
-3
lines changed

8 files changed

+194
-3
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "enhancement",
3+
"category": "awscrt",
4+
"description": "Exposes new CRT options for S3 file IO"
5+
}

awscli/customizations/s3/factory.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,15 @@ def _create_crt_client(self, params, runtime_config):
138138
create_crt_client_kwargs['crt_credentials_provider'] = (
139139
crt_credentials_provider
140140
)
141+
fio_options = {}
142+
if (val := runtime_config.get('should_stream')) is not None:
143+
fio_options['should_stream'] = val
144+
if (val := runtime_config.get('disk_throughput')) is not None:
145+
# Convert bytes to gigabits.
146+
fio_options['disk_throughput_gbps'] = val * 8 / 1_000_000_000
147+
if (val := runtime_config.get('direct_io')) is not None:
148+
fio_options['direct_io'] = val
149+
create_crt_client_kwargs['fio_options'] = fio_options
141150

142151
return create_s3_crt_client(**create_crt_client_kwargs)
143152

awscli/customizations/s3/transferconfig.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# commands.
1616
import logging
1717

18+
from botocore.utils import ensure_boolean
1819
from s3transfer.manager import TransferConfig
1920

2021
from awscli.customizations.s3 import constants
@@ -31,6 +32,9 @@
3132
'preferred_transfer_client': constants.AUTO_RESOLVE_TRANSFER_CLIENT,
3233
'target_bandwidth': None,
3334
'io_chunksize': 256 * 1024,
35+
'should_stream': None,
36+
'disk_throughput': None,
37+
'direct_io': None,
3438
}
3539

3640

@@ -47,9 +51,18 @@ class RuntimeConfig:
4751
'max_bandwidth',
4852
'target_bandwidth',
4953
'io_chunksize',
54+
'disk_throughput',
55+
]
56+
HUMAN_READABLE_SIZES = [
57+
'multipart_chunksize',
58+
'multipart_threshold',
59+
'io_chunksize',
60+
]
61+
HUMAN_READABLE_RATES = [
62+
'max_bandwidth',
63+
'target_bandwidth',
64+
'disk_throughput',
5065
]
51-
HUMAN_READABLE_SIZES = ['multipart_chunksize', 'multipart_threshold', 'io_chunksize']
52-
HUMAN_READABLE_RATES = ['max_bandwidth', 'target_bandwidth']
5366
SUPPORTED_CHOICES = {
5467
'preferred_transfer_client': [
5568
constants.AUTO_RESOLVE_TRANSFER_CLIENT,
@@ -62,6 +75,7 @@ class RuntimeConfig:
6275
'default': constants.CLASSIC_TRANSFER_CLIENT
6376
}
6477
}
78+
BOOLEANS = ['should_stream', 'direct_io']
6579

6680
@staticmethod
6781
def defaults():
@@ -83,6 +97,7 @@ def build_config(self, **kwargs):
8397
runtime_config.update(kwargs)
8498
self._convert_human_readable_sizes(runtime_config)
8599
self._convert_human_readable_rates(runtime_config)
100+
self._convert_booleans(runtime_config)
86101
self._resolve_choice_aliases(runtime_config)
87102
self._validate_config(runtime_config)
88103
return runtime_config
@@ -116,6 +131,12 @@ def _convert_human_readable_rates(self, runtime_config):
116131
'second (e.g. 10Mb/s or 800Kb/s)' % value
117132
)
118133

134+
def _convert_booleans(self, runtime_config):
135+
for attr in self.BOOLEANS:
136+
value = runtime_config.get(attr)
137+
if value is not None:
138+
runtime_config[attr] = ensure_boolean(value)
139+
119140
def _human_readable_rate_to_int(self, value):
120141
# The human_readable_to_int() utility only supports integers (e.g. 1024)
121142
# as strings and human readable sizes (e.g. 10MB, 5GB). It does not

awscli/s3transfer/crt.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
)
3434
from awscrt.s3 import (
3535
S3Client,
36+
S3FileIoOptions,
3637
S3RequestTlsMode,
3738
S3RequestType,
3839
S3ResponseError,
@@ -87,6 +88,7 @@ def create_s3_crt_client(
8788
part_size=8 * MB,
8889
use_ssl=True,
8990
verify=None,
91+
fio_options=None,
9092
):
9193
"""
9294
:type region: str
@@ -130,6 +132,9 @@ def create_s3_crt_client(
130132
* path/to/cert/bundle.pem - A filename of the CA cert bundle to
131133
use. Specify this argument if you want to use a custom CA cert
132134
bundle instead of the default one on your system.
135+
136+
:type fio_options: Optional[dict]
137+
:param fio_options: Kwargs to use to build an `awscrt.s3.S3FileIoOptions`.
133138
"""
134139

135140
event_loop_group = EventLoopGroup(num_threads)
@@ -153,6 +158,9 @@ def create_s3_crt_client(
153158
target_gbps = _get_crt_throughput_target_gbps(
154159
provided_throughput_target_bytes=target_throughput
155160
)
161+
crt_fio_options = None
162+
if fio_options:
163+
crt_fio_options = S3FileIoOptions(**fio_options)
156164
return S3Client(
157165
bootstrap=bootstrap,
158166
region=region,
@@ -162,6 +170,7 @@ def create_s3_crt_client(
162170
tls_connection_options=tls_connection_options,
163171
throughput_target_gbps=target_gbps,
164172
enable_s3express=True,
173+
fio_options=crt_fio_options,
165174
)
166175

167176

awscli/topics/s3-config.rst

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,50 @@ adjustments mid-transfer command in order to increase throughput and reach the
382382
requested bandwidth.
383383

384384

385+
should_stream
386+
-------------
387+
.. note::
388+
This configuration option is only supported when the ``preferred_transfer_client``
389+
configuration value is set to or resolves to ``crt``. The ``classic`` transfer
390+
client does not support this configuration option.
391+
392+
**Default** - ``false``
393+
394+
If set to ``true``, the CRT client will skip buffering parts in-memory before
395+
sending PUT requests.
396+
397+
398+
disk_throughput
399+
---------------
400+
.. note::
401+
This configuration option is only supported when the ``preferred_transfer_client``
402+
configuration value is set to or resolves to ``crt``. The ``classic`` transfer
403+
client does not support this configuration option.
404+
405+
**Default** - ``10.0``
406+
407+
The estimated target disk throughput. This value is only applied if
408+
``should_stream`` is set to ``true``. This value can be specified using
409+
the same semantics as ``target_throughput``, that is either as the
410+
number of bytes per second as an integer, or using a rate suffix.
411+
412+
413+
direct_io
414+
---------
415+
.. note::
416+
This configuration option is only supported when the ``preferred_transfer_client``
417+
configuration value is set to or resolves to ``crt``. The ``classic`` transfer
418+
client does not support this configuration option.
419+
420+
.. note::
421+
This configuration option is only supported on Linux.
422+
423+
**Default** - ``false``
424+
425+
If set to ``true``, the CRT client will enable direct IO to bypass the OS
426+
cache when sending PUT requests. Enabling direct IO may be useful in cases
427+
where the disk IO outperforms the kernel cache.
428+
385429
Experimental Configuration Values
386430
=================================
387431

tests/unit/customizations/s3/test_factory.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import awscrt.s3
1414
import pytest
1515
import s3transfer.crt
16-
from awscrt.s3 import S3RequestTlsMode
16+
from awscrt.s3 import S3FileIoOptions, S3RequestTlsMode
1717
from botocore.config import Config
1818
from botocore.credentials import Credentials
1919
from botocore.httpsession import DEFAULT_CA_BUNDLE
@@ -483,6 +483,27 @@ def test_target_bandwidth_configure_for_crt_manager(self, mock_crt_client):
483483
self.assert_is_crt_manager(transfer_manager)
484484
self.assert_expected_throughput_target_gbps(mock_crt_client, 8)
485485

486+
@mock.patch('s3transfer.crt.S3Client')
487+
def test_fio_options_configure_for_crt_manager(self, mock_crt_client):
488+
self.runtime_config = self.get_runtime_config(
489+
preferred_transfer_client='crt',
490+
should_stream=True,
491+
disk_throughput=1000**3,
492+
direct_io=True,
493+
)
494+
transfer_manager = self.factory.create_transfer_manager(
495+
self.params, self.runtime_config
496+
)
497+
expected_fio_options = S3FileIoOptions(
498+
should_stream=True,
499+
disk_throughput_gbps=8.0,
500+
direct_io=True,
501+
)
502+
self.assert_is_crt_manager(transfer_manager)
503+
self.assertEqual(
504+
mock_crt_client.call_args[1]['fio_options'], expected_fio_options
505+
)
506+
486507
@mock.patch('s3transfer.crt.get_recommended_throughput_target_gbps')
487508
@mock.patch('s3transfer.crt.S3Client')
488509
def test_target_bandwidth_uses_crt_recommended_throughput(

tests/unit/customizations/s3/test_transferconfig.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@ def test_set_preferred_transfer_client(self, provided, resolved):
103103
('target_bandwidth', '1000', 1000),
104104
('target_bandwidth', '1000B/s', 1000),
105105
('target_bandwidth', '8000b/s', 1000),
106+
# disk_throughput cases
107+
('disk_throughput', '1MB/s', 1024 * 1024),
108+
('disk_throughput', '10Mb/s', 10 * 1024 * 1024 / 8),
109+
('disk_throughput', '1000', 1000),
110+
('disk_throughput', '1000B/s', 1000),
111+
('disk_throughput', '8000b/s', 1000),
106112
],
107113
)
108114
def test_rate_conversions(self, config_name, provided, expected):
@@ -127,6 +133,13 @@ def test_rate_conversions(self, config_name, provided, expected):
127133
('target_bandwidth', '100/s'),
128134
('target_bandwidth', ''),
129135
('target_bandwidth', 'value-with-no-digits'),
136+
# disk_throughput cases
137+
('disk_throughput', '1MB'),
138+
('disk_throughput', '1B'),
139+
('disk_throughput', '1b'),
140+
('disk_throughput', '100/s'),
141+
('disk_throughput', ''),
142+
('disk_throughput', 'value-with-no-digits'),
130143
],
131144
)
132145
def test_invalid_rate_values(self, config_name, provided):
@@ -138,6 +151,22 @@ def test_validates_preferred_transfer_client_choices(self):
138151
with pytest.raises(transferconfig.InvalidConfigError):
139152
self.build_config_with(preferred_transfer_client='not-supported')
140153

154+
@pytest.mark.parametrize(
155+
'attr,val,expected',
156+
[
157+
('should_stream', 'true', True),
158+
('should_stream', 'false', False),
159+
('should_stream', None, None),
160+
('direct_io', 'true', True),
161+
('direct_io', 'false', False),
162+
('direct_io', None, None),
163+
],
164+
)
165+
def test_convert_booleans(self, attr, val, expected):
166+
params = {attr: val}
167+
runtime_config = self.build_config_with(**params)
168+
assert runtime_config[attr] == expected
169+
141170

142171
class TestConvertToS3TransferConfig:
143172
def test_convert(self):

tests/unit/s3transfer/test_crt.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from botocore.credentials import Credentials, ReadOnlyCredentials
1717
from botocore.exceptions import ClientError, NoCredentialsError
1818
from botocore.session import Session
19+
from s3transfer.constants import GB
1920
from s3transfer.exceptions import TransferNotDoneError
2021
from s3transfer.utils import CallArgs
2122

@@ -365,3 +366,55 @@ def test_target_throughput(
365366
def test_always_enables_s3express(self, mock_s3_crt_client):
366367
s3transfer.crt.create_s3_crt_client('us-west-2')
367368
assert mock_s3_crt_client.call_args[1]['enable_s3express'] is True
369+
370+
@pytest.mark.parametrize(
371+
'fio_options,should_stream,disk_throughput,direct_io',
372+
[
373+
({'should_stream': True}, True, 0.0, False),
374+
({'disk_throughput_gbps': 8}, False, 8, False),
375+
({'direct_io': True}, False, 0.0, True),
376+
(
377+
{'should_stream': True, 'disk_throughput_gbps': 8},
378+
True,
379+
8,
380+
False,
381+
),
382+
({'should_stream': True, 'direct_io': True}, True, 0.0, True),
383+
({'disk_throughput_gbps': 8, 'direct_io': True}, False, 8, True),
384+
(
385+
{
386+
'should_stream': True,
387+
'disk_throughput_gbps': 8,
388+
'direct_io': True,
389+
},
390+
True,
391+
8,
392+
True,
393+
),
394+
],
395+
)
396+
def test_fio_options(
397+
self,
398+
fio_options,
399+
should_stream,
400+
disk_throughput,
401+
direct_io,
402+
mock_s3_crt_client,
403+
):
404+
params = {'fio_options': fio_options}
405+
s3transfer.crt.create_s3_crt_client(
406+
'us-west-2',
407+
**params,
408+
)
409+
assert (
410+
mock_s3_crt_client.call_args[1]['fio_options'].should_stream
411+
is should_stream
412+
)
413+
assert (
414+
mock_s3_crt_client.call_args[1]['fio_options'].disk_throughput_gbps
415+
== disk_throughput
416+
)
417+
assert (
418+
mock_s3_crt_client.call_args[1]['fio_options'].direct_io
419+
is direct_io
420+
)

0 commit comments

Comments
 (0)