Skip to content

Commit a76fd61

Browse files
authored
Merge pull request ceph#62927 from ShwetaBhosale1/fix_issue_71031_add_Monitoring_Addr_param_to_ganesha.conf
mgr/nfs: Add Monitoring_Addr parameter to ganesha.conf and provide option for user to provide ips for bind_addr Reviewed-by: Adam King <[email protected]> Reviewed-by: Anthony D'Atri <[email protected]>
2 parents 062f85c + b862e39 commit a76fd61

File tree

10 files changed

+344
-18
lines changed

10 files changed

+344
-18
lines changed

doc/cephadm/services/nfs.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,37 @@ Alternatively, an NFS service can be applied using a YAML specification.
4747
hosts:
4848
- host1
4949
- host2
50+
networks:
51+
- 1.2.3.4/24
52+
ip_addrs:
53+
host1: 10.0.0.100
54+
host2: 10.0.0.101
5055
spec:
5156
port: 12345
57+
monitoring_port: 567
58+
monitoring_ip_addrs:
59+
host1: 10.0.0.123
60+
host2: 10.0.0.124
61+
monitoring_networks:
62+
- 192.168.124.0/24
63+
5264
5365
In this example, we run the server on the non-default ``port`` of
5466
12345 (instead of the default 2049) on ``host1`` and ``host2``.
67+
You can bind the NFS data port to a specific IP address using either the
68+
``ip_addrs`` or ``networks`` section. If ``ip_addrs`` is provided and
69+
the specified IP is assigned to the host, that IP will be used. If the
70+
IP is not present but ``networks`` is specified, an IP matching one of
71+
the given networks will be selected. If neither condition is met, the
72+
daemon will not start on that node.
73+
The default NFS monitoring port can be customized using the ``monitoring_port``
74+
parameter. Additionally, you can specify the ``monitoring_ip_addrs`` or
75+
``monitoring_networks`` parameters to bind the monitoring port to a specific
76+
IP address or network. If ``monitoring_ip_addrs`` is provided and the specified
77+
IP address is assigned to the host, that IP address will be used. If the IP
78+
address is not present and ``monitoring_networks`` is specified, an IP address
79+
that matches one of the specified networks will be used. If neither condition
80+
is met, the default binding will happen on all available network interfaces.
5581

5682
The specification can then be applied by running the following command:
5783

src/pybind/mgr/cephadm/inventory.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,6 +1597,14 @@ def get_scheduled_daemon_action(self, host: str, daemon: str) -> Optional[str]:
15971597

15981598
return self.scheduled_daemon_actions.get(host, {}).get(daemon)
15991599

1600+
def get_host_network_ips(self, host: str) -> List[str]:
1601+
return [
1602+
ip
1603+
for net_details in self.networks.get(host, {}).values()
1604+
for ips in net_details.values()
1605+
for ip in ips
1606+
]
1607+
16001608

16011609
class NodeProxyCache:
16021610
def __init__(self, mgr: 'CephadmOrchestrator') -> None:

src/pybind/mgr/cephadm/module.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,8 +1068,14 @@ def update_failed_daemon_health_check(self) -> None:
10681068
self.set_health_warning('CEPHADM_FAILED_DAEMON', f'{len(failed_daemons)} failed cephadm daemon(s)', len(
10691069
failed_daemons), failed_daemons)
10701070

1071-
def get_first_matching_network_ip(self, host: str, sspec: ServiceSpec) -> Optional[str]:
1072-
sspec_networks = sspec.networks
1071+
def get_first_matching_network_ip(
1072+
self,
1073+
host: str,
1074+
sspec: ServiceSpec,
1075+
sspec_networks: Optional[List[str]] = None
1076+
) -> Optional[str]:
1077+
if not sspec_networks:
1078+
sspec_networks = sspec.networks
10731079
for subnet, ifaces in self.cache.networks.get(host, {}).items():
10741080
host_network = ipaddress.ip_network(subnet)
10751081
for spec_network_str in sspec_networks:

src/pybind/mgr/cephadm/schedule.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -467,18 +467,34 @@ def get_candidates(self) -> List[DaemonPlacement]:
467467
"placement spec is empty: no hosts, no label, no pattern, no count")
468468

469469
# allocate an IP?
470-
if self.spec.networks:
470+
if self.spec.networks or self.spec.ip_addrs:
471471
orig = ls.copy()
472472
ls = []
473473
for p in orig:
474-
ip = self.find_ip_on_host(p.hostname, self.spec.networks)
474+
ip = None
475+
# daemon can have specific ip if 'ip_addrs' is spcified in spec, we can use this
476+
# parameter for all services, if they need to bind to specific ip
477+
# If ip not present and networks is passed, ip of that network will be used
478+
if self.spec.ip_addrs:
479+
ip = self.spec.ip_addrs.get(p.hostname)
480+
host_ips: List[str] = []
481+
for net_details in self.networks.get(p.hostname, {}).values():
482+
for ips in net_details.values():
483+
host_ips.extend(ips)
484+
if ip and ip not in host_ips:
485+
logger.debug(f"IP {ip} is not configured on host {p.hostname}.")
486+
ip = None
487+
if not ip and self.spec.networks:
488+
ip = self.find_ip_on_host(p.hostname, self.spec.networks)
475489
if ip:
476490
ls.append(DaemonPlacement(daemon_type=self.primary_daemon_type,
477491
hostname=p.hostname, network=p.network,
478492
name=p.name, ports=p.ports, ip=ip))
479493
else:
480494
logger.debug(
481-
f'Skipping {p.hostname} with no IP in network(s) {self.spec.networks}'
495+
f"Skipping {p.hostname} with no IP in provided networks or ip_addrs "
496+
f"{f'networks: {self.spec.networks}' if self.spec.networks else ''}"
497+
f"{f'ip_addrs: {self.spec.ip_addrs}' if self.spec.ip_addrs else ''}"
482498
)
483499

484500
if self.filter_new_host:

src/pybind/mgr/cephadm/service_discovery.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class Server: # type: ignore
2121
from cephadm.services.ingress import IngressSpec
2222
from cephadm.services.cephadmservice import CephExporterService
2323
from cephadm.services.nvmeof import NvmeofService
24+
from cephadm.services.service_registry import service_registry
2425

2526
from ceph.deployment.service_spec import SMBSpec
2627

@@ -265,8 +266,10 @@ def nfs_sd_config(self) -> List[Dict[str, Collection[str]]]:
265266
srv_entries = []
266267
for dd in self.mgr.cache.get_daemons_by_type('nfs'):
267268
assert dd.hostname is not None
268-
addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
269-
port = NFSService.DEFAULT_EXPORTER_PORT
269+
nfs = cast(NFSService, service_registry.get_service('nfs'))
270+
monitoring_ip, monitoring_port = nfs.get_monitoring_details(dd.service_name(), dd.hostname)
271+
addr = monitoring_ip or dd.ip or self.mgr.inventory.get_addr(dd.hostname)
272+
port = monitoring_port or NFSService.DEFAULT_EXPORTER_PORT
270273
srv_entries.append({
271274
'targets': [build_url(host=addr, port=port).lstrip('/')],
272275
'labels': {'instance': dd.hostname}

src/pybind/mgr/cephadm/services/nfs.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,20 +98,29 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st
9898
self.create_rados_config_obj(spec)
9999

100100
port = daemon_spec.ports[0] if daemon_spec.ports else 2049
101+
monitoring_ip, monitoring_port = self.get_monitoring_details(daemon_spec.service_name, host)
101102

102103
# create the RGW keyring
103104
rgw_user = f'{rados_user}-rgw'
104105
rgw_keyring = self.create_rgw_keyring(daemon_spec)
106+
bind_addr = ''
105107
if spec.virtual_ip and not spec.enable_haproxy_protocol:
106108
bind_addr = spec.virtual_ip
107109
daemon_spec.port_ips = {str(port): spec.virtual_ip}
108-
else:
109-
bind_addr = daemon_spec.ip if daemon_spec.ip else ''
110+
# update daemon spec ip for prometheus, as monitoring will happen on this
111+
# ip, if no monitor ip specified
112+
daemon_spec.ip = bind_addr
113+
elif daemon_spec.ip:
114+
bind_addr = daemon_spec.ip
115+
daemon_spec.port_ips = {str(port): daemon_spec.ip}
110116
if not bind_addr:
111117
logger.warning(f'Bind address in {daemon_type}.{daemon_id}\'s ganesha conf is defaulting to empty')
112118
else:
113119
logger.debug("using haproxy bind address: %r", bind_addr)
114120

121+
if monitoring_ip:
122+
daemon_spec.port_ips.update({str(monitoring_port): monitoring_ip})
123+
115124
# generate the ganesha config
116125
def get_ganesha_conf() -> str:
117126
context: Dict[str, Any] = {
@@ -123,7 +132,8 @@ def get_ganesha_conf() -> str:
123132
"url": f'rados://{POOL_NAME}/{spec.service_id}/{spec.rados_config_name()}',
124133
# fall back to default NFS port if not present in daemon_spec
125134
"port": port,
126-
"monitoring_port": spec.monitoring_port if spec.monitoring_port else 9587,
135+
"monitoring_addr": monitoring_ip,
136+
"monitoring_port": monitoring_port,
127137
"bind_addr": bind_addr,
128138
"haproxy_hosts": [],
129139
"nfs_idmap_conf": nfs_idmap_conf,
@@ -372,3 +382,18 @@ def _haproxy_hosts(self) -> List[str]:
372382
# one address per interface/subnet is enough
373383
cluster_ips.append(addrs[0])
374384
return cluster_ips
385+
386+
def get_monitoring_details(self, service_name: str, host: str) -> Tuple[Optional[str], Optional[int]]:
387+
spec = cast(NFSServiceSpec, self.mgr.spec_store[service_name].spec)
388+
monitoring_port = spec.monitoring_port if spec.monitoring_port else 9587
389+
390+
# check if monitor needs to be bind on specific ip
391+
monitoring_addr = spec.monitoring_ip_addrs.get(host) if spec.monitoring_ip_addrs else None
392+
if monitoring_addr and monitoring_addr not in self.mgr.cache.get_host_network_ips(host):
393+
logger.debug(f"Monitoring IP {monitoring_addr} is not configured on host {host}.")
394+
monitoring_addr = None
395+
if not monitoring_addr and spec.monitoring_networks:
396+
monitoring_addr = self.mgr.get_first_matching_network_ip(host, spec, spec.monitoring_networks)
397+
if not monitoring_addr:
398+
logger.debug(f"No IP address found in the network {spec.monitoring_networks} on host {host}.")
399+
return monitoring_addr, monitoring_port

src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ NFS_CORE_PARAM {
1212
{% endif %}
1313
{% if haproxy_hosts %}
1414
HAProxy_Hosts = {{ haproxy_hosts|join(", ") }};
15+
{% endif %}
16+
{% if monitoring_addr %}
17+
Monitoring_Addr = {{ monitoring_addr }};
1518
{% endif %}
1619
Monitoring_Port = {{ monitoring_port }};
1720
}

src/pybind/mgr/cephadm/tests/test_service_discovery.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from unittest.mock import MagicMock
22
from cephadm.service_discovery import Root
3+
from cephadm.services.service_registry import service_registry
34

45

56
class FakeDaemonDescription:
@@ -39,15 +40,31 @@ def get_daemons_by_service(self, service_type):
3940
FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
4041

4142
def get_daemons_by_type(self, daemon_type):
42-
return [FakeDaemonDescription('1.2.3.4', [9100], 'node0', 'ingress', 'haproxy'),
43-
FakeDaemonDescription('1.2.3.5', [9200], 'node1', 'ingress', 'haproxy')]
43+
if daemon_type == 'ingress':
44+
return [FakeDaemonDescription('1.2.3.4', [9100], 'node0', 'ingress', 'haproxy'),
45+
FakeDaemonDescription('1.2.3.5', [9200], 'node1', 'ingress', 'haproxy')]
46+
else:
47+
return [FakeDaemonDescription('1.2.3.4', [1234], 'node0', daemon_type, daemon_type),
48+
FakeDaemonDescription('1.2.3.5', [1234], 'node1', daemon_type, daemon_type)]
4449

4550

4651
class FakeInventory:
4752
def get_addr(self, name: str):
4853
return '1.2.3.4'
4954

5055

56+
class FakeNFSServiceSpec:
57+
def __init__(self, port):
58+
self.monitoring_port = None
59+
self.monitoring_ip_addrs = None
60+
self.monitoring_networks = None
61+
62+
63+
class FakeIngressServiceSpec:
64+
def __init__(self, port):
65+
self.monitor_port = port
66+
67+
5168
class FakeServiceSpec:
5269
def __init__(self, port):
5370
self.monitor_port = port
@@ -58,20 +75,25 @@ def metrics_exporter_port(self):
5875

5976

6077
class FakeSpecDescription:
61-
def __init__(self, port):
62-
self.spec = FakeServiceSpec(port)
78+
def __init__(self, service, port):
79+
if service == 'ingress':
80+
self.spec = FakeIngressServiceSpec(port)
81+
elif service == 'nfs':
82+
self.spec = FakeNFSServiceSpec(port)
83+
else:
84+
self.spec = FakeServiceSpec(port)
6385

6486

6587
class FakeSpecStore():
6688
def __init__(self, mgr):
6789
self.mgr = mgr
68-
self._specs = {'ingress': FakeSpecDescription(9049)}
90+
self._specs = {'ingress': FakeSpecDescription('ingress', 9049), 'nfs': FakeSpecDescription('nfs', 9587)}
6991

7092
def __contains__(self, name):
7193
return name in self._specs
7294

7395
def __getitem__(self, name):
74-
return self._specs['ingress']
96+
return self._specs[name]
7597

7698

7799
class FakeMgr:
@@ -84,6 +106,7 @@ def __init__(self):
84106
self.inventory = FakeInventory()
85107
self.cache = FakeCache()
86108
self.spec_store = FakeSpecStore(self)
109+
service_registry.init_services(self)
87110

88111
def get_mgr_id(self):
89112
return 'mgr-1'

0 commit comments

Comments
 (0)