Skip to content

Commit 6ca2848

Browse files
committed
cephadm: improve hw qat experience with cephadm
Signed-off-by: Kushal Deb <[email protected]>
1 parent fd3d6bc commit 6ca2848

File tree

4 files changed

+100
-0
lines changed

4 files changed

+100
-0
lines changed

doc/cephadm/services/rgw.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,22 @@ The ``wildcard_enabled`` flag ensures that a wildcard SAN entry is included in t
203203
allowing access to buckets in virtual host mode. By default, this flag is disabled.
204204
example: wildcard SAN - (``*.s3.cephlab.com``)
205205

206+
Cephadm ``ceph orch`` specs for RGW services now support the following optional configuration:
207+
208+
.. code-block:: yaml
209+
210+
spec:
211+
qat:
212+
compression: hw | sw
213+
214+
compression:
215+
216+
``hw``: Enables hardware QAT offload (if QAT hardware and VFs are present on the node)
217+
218+
``sw``: Enables QAT software fallback mode
219+
220+
No other keys are currently supported in the ``qat`` block.
221+
206222
Disabling multisite sync traffic
207223
--------------------------------
208224

src/cephadm/cephadmlib/daemons/ceph.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from ..context import CephadmContext
1717
from ..deployment_utils import to_deployment_container
1818
from ..exceptions import Error
19+
from ..call_wrappers import call_throws
1920
from ..file_utils import (
2021
make_run_dir,
2122
pathify,
@@ -192,10 +193,64 @@ def customize_container_mounts(
192193
)
193194
mounts.update(cm)
194195

196+
def setup_qat_args(self, ctx: CephadmContext, args: List[str]) -> None:
197+
try:
198+
out, _, _ = call_throws(ctx, ['ls', '-1', '/dev/vfio/devices'])
199+
devices = [d for d in out.split('\n') if d]
200+
201+
args.extend(
202+
[
203+
'--cap-add=SYS_ADMIN',
204+
'--cap-add=SYS_PTRACE',
205+
'--cap-add=IPC_LOCK',
206+
'--security-opt',
207+
'seccomp=unconfined',
208+
'--ulimit',
209+
'memlock=209715200:209715200',
210+
'--device=/dev/qat_adf_ctl:/dev/qat_adf_ctl',
211+
'--device=/dev/vfio/vfio:/dev/vfio/vfio',
212+
'-v',
213+
'/dev:/dev',
214+
'--volume=/etc/sysconfig/qat:/etc/sysconfig/qat:ro',
215+
]
216+
)
217+
218+
for dev in devices:
219+
args.append(
220+
f'--device=/dev/vfio/devices/{dev}:/dev/vfio/devices/{dev}'
221+
)
222+
223+
os.makedirs('/etc/sysconfig', exist_ok=True)
224+
with open('/etc/sysconfig/qat', 'w') as f:
225+
f.write('ServicesEnabled=dc\nPOLICY=8\nQAT_USER=ceph\n')
226+
227+
logger.info(
228+
f'[QAT] Successfully injected container args for {self.identity.daemon_name}'
229+
)
230+
except RuntimeError:
231+
logger.exception('[QAT] Could not list /dev/vfio/devices')
232+
devices = []
233+
195234
def customize_container_args(
196235
self, ctx: CephadmContext, args: List[str]
197236
) -> None:
198237
args.append(ctx.container_engine.unlimited_pids_option)
238+
config_json = fetch_configs(ctx)
239+
qat_raw: Any = config_json.get('qat', {})
240+
if qat_raw is None:
241+
qat_config: Dict[str, Any] = {}
242+
elif isinstance(qat_raw, dict):
243+
qat_config = qat_raw
244+
else:
245+
raise Error(
246+
f'Invalid qat config: expected dict got {type(qat_raw.__name__)}'
247+
)
248+
249+
if (
250+
self.identity.daemon_type == 'rgw'
251+
and qat_config.get('compression') == 'hw'
252+
):
253+
self.setup_qat_args(ctx, args)
199254

200255
def customize_process_args(
201256
self, ctx: CephadmContext, args: List[str]

src/pybind/mgr/cephadm/services/cephadmservice.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,6 +1237,15 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
12371237
'value': 'false' if spec.disable_multisite_sync_traffic else 'true',
12381238
})
12391239

1240+
qat_mode = spec.qat.get('compression') if spec.qat else None
1241+
if qat_mode in ('sw', 'hw'):
1242+
ret, out, err = self.mgr.check_mon_command({
1243+
'prefix': 'config set',
1244+
'who': daemon_name,
1245+
'name': 'qat_compressor_enabled',
1246+
'value': 'true',
1247+
})
1248+
12401249
daemon_spec.keyring = keyring
12411250
daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
12421251

@@ -1273,6 +1282,11 @@ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None
12731282
'who': utils.name_to_config_section(daemon.name()),
12741283
'name': 'rgw_frontends',
12751284
})
1285+
self.mgr.check_mon_command({
1286+
'prefix': 'config rm',
1287+
'who': utils.name_to_config_section(daemon.name()),
1288+
'name': 'qat_compressor_enabled'
1289+
})
12761290
self.mgr.check_mon_command({
12771291
'prefix': 'config-key rm',
12781292
'key': f'rgw/cert/{daemon.name()}',
@@ -1323,6 +1337,9 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st
13231337
if hasattr(svc_spec, 'rgw_exit_timeout_secs') and svc_spec.rgw_exit_timeout_secs:
13241338
config['rgw_exit_timeout_secs'] = svc_spec.rgw_exit_timeout_secs
13251339

1340+
if svc_spec.qat:
1341+
config['qat'] = svc_spec.qat
1342+
13261343
rgw_deps = parent_deps + self.get_dependencies(self.mgr, svc_spec)
13271344
return config, rgw_deps
13281345

@@ -1454,6 +1471,7 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
14541471
'ceph-exporter.crt': crt,
14551472
'ceph-exporter.key': key
14561473
}
1474+
14571475
daemon_spec.keyring = keyring
14581476
daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
14591477
daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config)

src/python-common/ceph/deployment/service_spec.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,7 @@ def __init__(self,
12381238
disable_multisite_sync_traffic: Optional[bool] = None,
12391239
wildcard_enabled: Optional[bool] = False,
12401240
rgw_exit_timeout_secs: int = 120,
1241+
qat: Optional[Dict[str, str]] = None,
12411242
):
12421243
assert service_type == 'rgw', service_type
12431244

@@ -1300,6 +1301,8 @@ def __init__(self,
13001301
#: How long the RGW will wait to try and complete client requests when told to shut down
13011302
self.rgw_exit_timeout_secs = rgw_exit_timeout_secs
13021303

1304+
self.qat = qat or {}
1305+
13031306
def get_port_start(self) -> List[int]:
13041307
ports = self.get_port()
13051308
return ports
@@ -1361,6 +1364,14 @@ def validate(self) -> None:
13611364
'ec profile will be generated automatically based on provided attributes'
13621365
)
13631366

1367+
valid_compression_modes = ('sw', 'hw')
1368+
if self.qat:
1369+
compression = self.qat.get('compression')
1370+
if compression and compression not in valid_compression_modes:
1371+
raise SpecValidationError(
1372+
f"Invalid compression mode {compression}. Only 'sw' and 'hw' are allowed"
1373+
)
1374+
13641375

13651376
yaml.add_representer(RGWSpec, ServiceSpec.yaml_representer)
13661377

0 commit comments

Comments
 (0)