Skip to content

Commit d425653

Browse files
authored
Merge pull request ceph#63895 from Kushal-deb/rgw-qat-compression
cephadm: improve hw qat experience with cephadm Reviewed-by: Adam King <[email protected]>
2 parents 111bb61 + 6ca2848 commit d425653

File tree

4 files changed

+100
-0
lines changed

4 files changed

+100
-0
lines changed

doc/cephadm/services/rgw.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,22 @@ The ``wildcard_enabled`` flag ensures that a wildcard SAN entry is included in t
203203
allowing access to buckets in virtual host mode. By default, this flag is disabled.
204204
example: wildcard SAN - (``*.s3.cephlab.com``)
205205

206+
Cephadm ``ceph orch`` specs for RGW services now support the following optional configuration:
207+
208+
.. code-block:: yaml
209+
210+
spec:
211+
qat:
212+
compression: hw | sw
213+
214+
compression:
215+
216+
``hw``: Enables hardware QAT offload (if QAT hardware and VFs are present on the node)
217+
218+
``sw``: Enables QAT software fallback mode
219+
220+
No other keys are currently supported in the ``qat`` block.
221+
206222
Disabling multisite sync traffic
207223
--------------------------------
208224

src/cephadm/cephadmlib/daemons/ceph.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from ..context import CephadmContext
1717
from ..deployment_utils import to_deployment_container
1818
from ..exceptions import Error
19+
from ..call_wrappers import call_throws
1920
from ..file_utils import (
2021
make_run_dir,
2122
pathify,
@@ -196,10 +197,64 @@ def customize_container_mounts(
196197
)
197198
mounts.update(cm)
198199

200+
def setup_qat_args(self, ctx: CephadmContext, args: List[str]) -> None:
201+
try:
202+
out, _, _ = call_throws(ctx, ['ls', '-1', '/dev/vfio/devices'])
203+
devices = [d for d in out.split('\n') if d]
204+
205+
args.extend(
206+
[
207+
'--cap-add=SYS_ADMIN',
208+
'--cap-add=SYS_PTRACE',
209+
'--cap-add=IPC_LOCK',
210+
'--security-opt',
211+
'seccomp=unconfined',
212+
'--ulimit',
213+
'memlock=209715200:209715200',
214+
'--device=/dev/qat_adf_ctl:/dev/qat_adf_ctl',
215+
'--device=/dev/vfio/vfio:/dev/vfio/vfio',
216+
'-v',
217+
'/dev:/dev',
218+
'--volume=/etc/sysconfig/qat:/etc/sysconfig/qat:ro',
219+
]
220+
)
221+
222+
for dev in devices:
223+
args.append(
224+
f'--device=/dev/vfio/devices/{dev}:/dev/vfio/devices/{dev}'
225+
)
226+
227+
os.makedirs('/etc/sysconfig', exist_ok=True)
228+
with open('/etc/sysconfig/qat', 'w') as f:
229+
f.write('ServicesEnabled=dc\nPOLICY=8\nQAT_USER=ceph\n')
230+
231+
logger.info(
232+
f'[QAT] Successfully injected container args for {self.identity.daemon_name}'
233+
)
234+
except RuntimeError:
235+
logger.exception('[QAT] Could not list /dev/vfio/devices')
236+
devices = []
237+
199238
def customize_container_args(
200239
self, ctx: CephadmContext, args: List[str]
201240
) -> None:
202241
args.append(ctx.container_engine.unlimited_pids_option)
242+
config_json = fetch_configs(ctx)
243+
qat_raw: Any = config_json.get('qat', {})
244+
if qat_raw is None:
245+
qat_config: Dict[str, Any] = {}
246+
elif isinstance(qat_raw, dict):
247+
qat_config = qat_raw
248+
else:
249+
raise Error(
250+
f'Invalid qat config: expected dict got {type(qat_raw.__name__)}'
251+
)
252+
253+
if (
254+
self.identity.daemon_type == 'rgw'
255+
and qat_config.get('compression') == 'hw'
256+
):
257+
self.setup_qat_args(ctx, args)
203258

204259
def customize_process_args(
205260
self, ctx: CephadmContext, args: List[str]

src/pybind/mgr/cephadm/services/cephadmservice.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,6 +1377,15 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
13771377
'value': 'false' if spec.disable_multisite_sync_traffic else 'true',
13781378
})
13791379

1380+
qat_mode = spec.qat.get('compression') if spec.qat else None
1381+
if qat_mode in ('sw', 'hw'):
1382+
ret, out, err = self.mgr.check_mon_command({
1383+
'prefix': 'config set',
1384+
'who': daemon_name,
1385+
'name': 'qat_compressor_enabled',
1386+
'value': 'true',
1387+
})
1388+
13801389
daemon_spec.keyring = keyring
13811390
daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
13821391

@@ -1413,6 +1422,11 @@ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None
14131422
'who': utils.name_to_config_section(daemon.name()),
14141423
'name': 'rgw_frontends',
14151424
})
1425+
self.mgr.check_mon_command({
1426+
'prefix': 'config rm',
1427+
'who': utils.name_to_config_section(daemon.name()),
1428+
'name': 'qat_compressor_enabled'
1429+
})
14161430
self.mgr.check_mon_command({
14171431
'prefix': 'config-key rm',
14181432
'key': f'rgw/cert/{daemon.name()}',
@@ -1463,6 +1477,9 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st
14631477
if hasattr(svc_spec, 'rgw_exit_timeout_secs') and svc_spec.rgw_exit_timeout_secs:
14641478
config['rgw_exit_timeout_secs'] = svc_spec.rgw_exit_timeout_secs
14651479

1480+
if svc_spec.qat:
1481+
config['qat'] = svc_spec.qat
1482+
14661483
rgw_deps = parent_deps + self.get_dependencies(self.mgr, svc_spec)
14671484
return config, rgw_deps
14681485

@@ -1599,6 +1616,7 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
15991616
'ceph-exporter.crt': crt,
16001617
'ceph-exporter.key': key
16011618
}
1619+
16021620
daemon_spec.keyring = keyring
16031621
daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
16041622
daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config)

src/python-common/ceph/deployment/service_spec.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,7 @@ def __init__(self,
14121412
disable_multisite_sync_traffic: Optional[bool] = None,
14131413
wildcard_enabled: Optional[bool] = False,
14141414
rgw_exit_timeout_secs: int = 120,
1415+
qat: Optional[Dict[str, str]] = None,
14151416
):
14161417
assert service_type == 'rgw', service_type
14171418

@@ -1479,6 +1480,8 @@ def __init__(self,
14791480
#: How long the RGW will wait to try and complete client requests when told to shut down
14801481
self.rgw_exit_timeout_secs = rgw_exit_timeout_secs
14811482

1483+
self.qat = qat or {}
1484+
14821485
def get_port_start(self) -> List[int]:
14831486
ports = self.get_port()
14841487
return ports
@@ -1549,6 +1552,14 @@ def validate(self) -> None:
15491552
'ec profile will be generated automatically based on provided attributes'
15501553
)
15511554

1555+
valid_compression_modes = ('sw', 'hw')
1556+
if self.qat:
1557+
compression = self.qat.get('compression')
1558+
if compression and compression not in valid_compression_modes:
1559+
raise SpecValidationError(
1560+
f"Invalid compression mode {compression}. Only 'sw' and 'hw' are allowed"
1561+
)
1562+
15521563

15531564
yaml.add_representer(RGWSpec, ServiceSpec.yaml_representer)
15541565

0 commit comments

Comments
 (0)