Skip to content

Commit 22837e7

Browse files
Merge branch 'dev' into fixing-node-memory-handling
2 parents 75a4647 + f1a97ae commit 22837e7

File tree

8 files changed

+117
-80
lines changed

8 files changed

+117
-80
lines changed

bibigrid/core/actions/create.py

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import mergedeep
1212
import paramiko
1313
import sympy
14-
import yaml
1514
from werkzeug.utils import secure_filename
1615

1716
from bibigrid.core.actions.terminate import delete_keypairs, delete_local_keypairs, terminate, write_cluster_state
@@ -78,6 +77,9 @@ def __init__(self, *, providers, configurations, config_path, log, debug=False,
7877
self.use_master_with_public_ip = not configurations[0].get("gateway") and configurations[0].get(
7978
"useMasterWithPublicIp", True)
8079
self.log.debug("Keyname: %s", self.key_name)
80+
self.host_vars = {"host_entries": {}}
81+
self.host_vars_lock = threading.Lock()
82+
self.write_remote = []
8183

8284
os.makedirs(os.path.join(CLUSTER_INFO_FOLDER), exist_ok=True)
8385
write_cluster_state(
@@ -273,14 +275,9 @@ def start_worker(self, worker, worker_count, configuration, provider): # pylint
273275
# for DNS resolution an entry in the hosts file is created
274276
with self.worker_thread_lock:
275277
self.permanents.append(name)
276-
with open(a_rp.HOSTS_FILE, mode="r", encoding="UTF-8") as hosts_file:
277-
hosts = yaml.safe_load(hosts_file)
278-
if not hosts or "host_entries" not in hosts:
279-
self.log.warning("Hosts file is broken.")
280-
hosts = {"host_entries": {}}
281-
hosts["host_entries"][name] = server["private_v4"]
282-
ansible_configurator.write_yaml(a_rp.HOSTS_FILE, hosts, self.log)
283-
self.log.debug(f"Added worker {name} to hosts file {a_rp.HOSTS_FILE}.")
278+
with self.host_vars_lock:
279+
self.host_vars["host_entries"][name] = server["private_v4"]
280+
self.log.debug(f"Added worker {name} to host vars.")
284281

285282
# pylint: disable=duplicate-code
286283
def create_server_volumes(self, provider, instance, name):
@@ -359,9 +356,9 @@ def add_volume_device_info_to_instance(self, provider, server, instance):
359356
f"as device {device} that is going to be mounted to "
360357
f"{volume.get('mountPoint')}")
361358

362-
ansible_configurator.write_yaml(os.path.join(a_rp.HOST_VARS_FOLDER, f"{server['name']}.yaml"),
363-
{"volumes": final_volumes},
364-
self.log)
359+
self.write_remote.append(
360+
({"volumes": final_volumes}, os.path.join(a_rp.HOST_VARS_FOLDER_REMOTE, f"{server['name']}.yaml"),
361+
))
365362

366363
def prepare_vpn_or_master_args(self, configuration):
367364
"""
@@ -434,20 +431,20 @@ def upload_data(self, private_key, clean_playbook=False):
434431
@return:
435432
"""
436433
self.log.debug("Running upload_data")
437-
if not os.path.isfile(a_rp.HOSTS_FILE):
438-
with open(a_rp.HOSTS_FILE, 'a', encoding='utf-8') as hosts_file:
439-
hosts_file.write("# placeholder file for worker DNS entries (see 003-dns)")
440-
441-
ansible_configurator.configure_ansible_yaml(providers=self.providers, configurations=self.configurations,
442-
cluster_id=self.cluster_id, log=self.log)
443-
ansible_start = ssh_handler.ANSIBLE_START
444-
ansible_start[-1] = (ansible_start[-1][0].format(",".join(self.permanents)), ansible_start[-1][1])
434+
435+
self.write_remote = (self.write_remote +
436+
ansible_configurator.configure_ansible_yaml(providers=self.providers,
437+
configurations=self.configurations,
438+
cluster_id=self.cluster_id,
439+
log=self.log))
440+
self.write_remote.append((self.host_vars, a_rp.HOSTS_FILE_REMOTE))
441+
ansible_start = ssh_handler.ansible_start(",".join(self.permanents))
445442
self.log.debug(f"Starting playbook with {ansible_start}.")
446443
if self.configurations[0].get("dontUploadCredentials"):
447444
commands = ansible_start
448445
else:
449446
commands = [ssh_handler.get_ac_command(self.providers, AC_NAME.format(
450-
cluster_id=self.cluster_id))] + ssh_handler.ANSIBLE_START
447+
cluster_id=self.cluster_id))] + ansible_start
451448
if clean_playbook:
452449
self.log.info("Cleaning Playbook")
453450
ssh_data = {"floating_ip": self.master_ip, "private_key": private_key, "username": self.ssh_user,
@@ -456,7 +453,7 @@ def upload_data(self, private_key, clean_playbook=False):
456453
ssh_handler.execute_ssh(ssh_data=ssh_data, log=self.log)
457454
self.log.info("Uploading Data")
458455
ssh_data = {"floating_ip": self.master_ip, "private_key": private_key, "username": self.ssh_user,
459-
"commands": commands, "filepaths": UPLOAD_FILEPATHS,
456+
"commands": commands, "filepaths": UPLOAD_FILEPATHS, "write_remote": self.write_remote,
460457
"gateway": self.configurations[0].get("gateway", {}),
461458
"timeout": self.ssh_timeout}
462459
ssh_handler.execute_ssh(ssh_data=ssh_data, log=self.log)
@@ -469,7 +466,6 @@ def start_start_server_threads(self):
469466
self.log.debug("Running start_start_server_threads")
470467
start_server_threads = []
471468
worker_count = 0
472-
ansible_configurator.write_yaml(a_rp.HOSTS_FILE, {"host_entries": {}}, self.log)
473469
for configuration, provider in zip(self.configurations, self.providers):
474470
start_server_thread = return_threading.ReturnThread(target=self.start_vpn_or_master,
475471
args=[configuration, provider])
@@ -491,7 +487,7 @@ def start_start_server_threads(self):
491487
for start_server_thread in start_server_threads:
492488
try:
493489
start_server_thread.join()
494-
except Exception as e: # pylint: disable=broad-except
490+
except Exception as e: # pylint: disable=broad-except
495491
self.log.warning(f"Worker thread {start_server_thread} raised exception {e}.")
496492
worker_exceptions.append(e)
497493
if worker_exceptions:

bibigrid/core/actions/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
LOG = logging.getLogger("bibigrid")
1616

17-
__version__ = "0.5.0"
17+
__version__ = "3.0.1-hotfix"
1818
RELEASE_DATE = "2025"
1919
GIT_HUB = "https://github.com/BiBiServ/bibigrid"
2020

bibigrid/core/utility/ansible_commands.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
Module containing a bunch of useful commands to be used by sshHandler.py for cluster setup
33
"""
44

5-
import os
6-
import bibigrid.core.utility.paths.ansible_resources_path as a_rp
7-
85
# TO_LOG = "| sudo tee -a /var/log/ansible.log"
96
# AIY = "apt-get -y install"
107
# SAU = "sudo apt-get update"
@@ -49,19 +46,16 @@
4946
"Adjust playbook home permission.")
5047
MV_ANSIBLE_CONFIG = (
5148
"sudo install -D /opt/playbook/ansible.cfg /etc/ansible/ansible.cfg", "Move ansible configuration.")
52-
EXECUTE = (f"/opt/bibigrid-venv/bin/ansible-playbook {os.path.join(a_rp.PLAYBOOK_PATH_REMOTE, a_rp.SITE_YAML)} -i "
53-
f"{os.path.join(a_rp.PLAYBOOK_PATH_REMOTE, a_rp.ANSIBLE_HOSTS)} -l {{}}",
54-
"Execute ansible playbook. Be patient.")
5549

5650
# ansible setup
5751
WAIT_FOR_SERVICES = (
5852
"while [[ $(systemctl is-active {service}) == 'active' ]]; do echo 'Waiting for service {service}'; sleep 2; done",
5953
"Waiting for service {service}.")
6054
UPDATE = ("sudo apt-get update", "Update apt repository lists.")
6155
PYTHON3_PIP = ("sudo apt-get install -y python3-pip python3-venv", "Install python3 pip and venv using apt.")
62-
VENV_SETUP = ("sudo python3 -m venv /opt/bibigrid-venv"," Create bibigrid virtual environment.")
56+
VENV_SETUP = ("sudo python3 -m venv /opt/bibigrid-venv", " Create bibigrid virtual environment.")
6357
ANSIBLE_PASSLIB = ("sudo /opt/bibigrid-venv/bin/pip install ansible==10.7 passlib",
6458
"Install Ansible 10.7 and Passlib using pip.")
65-
ANSIBLE_GALAXY = ("sudo /opt/bibigrid-venv/bin/ansible-galaxy collection install "+
59+
ANSIBLE_GALAXY = ("sudo /opt/bibigrid-venv/bin/ansible-galaxy collection install " +
6660
"-p /usr/share/ansible/collections community.zabbix==3.2.0",
6761
"Install necessary ansible-galaxy modules.")

bibigrid/core/utility/ansible_configurator.py

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ def generate_site_file_yaml(user_roles):
5656
return site_yaml
5757

5858

59-
def write_worker_host_vars(*, cluster_id, worker, worker_count, log):
59+
def get_worker_host_vars(*, cluster_id, worker, worker_count):
60+
write_host_vars_remote = []
6061
for worker_number in range(worker.get('count', 1)):
6162
name = WORKER_IDENTIFIER(cluster_id=cluster_id, additional=worker_count + worker_number)
6263
write_volumes = []
@@ -73,12 +74,14 @@ def write_worker_host_vars(*, cluster_id, worker, worker_count, log):
7374
else:
7475
volume_name = volume["name"]
7576
write_volumes.append({**volume, "name": volume_name})
76-
write_yaml(os.path.join(aRP.HOST_VARS_FOLDER, f"{name}.yaml"),
77-
{"volumes": write_volumes},
78-
log)
77+
write_host_vars_remote.append(
78+
({"volumes": write_volumes}, os.path.join(aRP.HOST_VARS_FOLDER_REMOTE, f"{name}.yaml")))
79+
return write_host_vars_remote
7980

8081

81-
def write_worker_vars(*, provider, configuration, cluster_id, worker, worker_count, log):
82+
def get_worker_vars(*, provider, configuration, cluster_id, worker,
83+
worker_count): # pylint: disable-msg=too-many-locals
84+
write_worker_vars_remote = []
8285
flavor_dict = provider.create_flavor_dict(flavor=worker["type"])
8386
name = WORKER_IDENTIFIER(cluster_id=cluster_id,
8487
additional=f"[{worker_count}-{worker_count + worker.get('count', 1) - 1}]")
@@ -91,7 +94,8 @@ def write_worker_vars(*, provider, configuration, cluster_id, worker, worker_cou
9194
"network": configuration["network"], "flavor": flavor_dict,
9295
"gateway_ip": configuration["private_v4"],
9396
"cloud_identifier": configuration["cloud_identifier"],
94-
"on_demand": worker.get("onDemand", True), "state": "CLOUD",
97+
"on_demand": worker.get("onDemand", True),
98+
"state": "CLOUD",
9599
"partitions": partitions,
96100
"boot_volume": worker.get("bootVolume", configuration.get("bootVolume", {})),
97101
"meta": mergedeep.merge({}, worker.get("meta", {}), configuration.get("meta", {})),
@@ -107,15 +111,15 @@ def write_worker_vars(*, provider, configuration, cluster_id, worker, worker_cou
107111
worker_dict["features"] = features
108112

109113
pass_through(configuration, worker_dict, "waitForServices", "wait_for_services")
110-
write_yaml(os.path.join(aRP.GROUP_VARS_FOLDER, f"{group_name}.yaml"), worker_dict, log)
114+
write_worker_vars_remote.append((worker_dict, os.path.join(aRP.GROUP_VARS_FOLDER_REMOTE, f"{group_name}.yaml")))
111115
if worker_dict["on_demand"]: # not on demand instances host_vars are created in create
112-
write_worker_host_vars(cluster_id=cluster_id, worker=worker, worker_count=worker_count,
113-
log=log)
116+
write_worker_vars_remote = write_worker_vars_remote + get_worker_host_vars(cluster_id=cluster_id, worker=worker,
117+
worker_count=worker_count)
114118
worker_count += worker.get('count', 1)
115-
return worker_count
119+
return worker_count, write_worker_vars_remote
116120

117121

118-
def write_vpn_var(*, provider, configuration, cluster_id, vpngtw, vpn_count, log):
122+
def get_vpn_var(*, provider, configuration, cluster_id, vpngtw, vpn_count):
119123
name = VPNGTW_IDENTIFIER(cluster_id=cluster_id, additional=f"{vpn_count}")
120124
wireguard_ip = f"10.0.0.{vpn_count + 2}" # skipping 0 and 1 (master)
121125
vpn_count += 1
@@ -131,10 +135,10 @@ def write_vpn_var(*, provider, configuration, cluster_id, vpngtw, vpn_count, log
131135
if configuration.get("wireguard_peer"):
132136
vpngtw_dict["wireguard"] = {"ip": wireguard_ip, "peer": configuration.get("wireguard_peer")}
133137
pass_through(configuration, vpngtw_dict, "waitForServices", "wait_for_services")
134-
write_yaml(os.path.join(aRP.HOST_VARS_FOLDER, f"{name}.yaml"), vpngtw_dict, log)
138+
return vpngtw_dict, os.path.join(aRP.HOST_VARS_FOLDER_REMOTE, f"{name}.yaml")
135139

136140

137-
def write_master_var(provider, configuration, cluster_id, log):
141+
def get_master_var(provider, configuration, cluster_id):
138142
master = configuration["masterInstance"]
139143
name = MASTER_IDENTIFIER(cluster_id=cluster_id)
140144
flavor_dict = provider.create_flavor_dict(flavor=master["type"])
@@ -152,10 +156,10 @@ def write_master_var(provider, configuration, cluster_id, log):
152156
if configuration.get("wireguard_peer"):
153157
master_dict["wireguard"] = {"ip": "10.0.0.1", "peer": configuration.get("wireguard_peer")}
154158
pass_through(configuration, master_dict, "waitForServices", "wait_for_services")
155-
write_yaml(os.path.join(aRP.GROUP_VARS_FOLDER, "master.yaml"), master_dict, log)
159+
return master_dict, os.path.join(aRP.GROUP_VARS_FOLDER_REMOTE, "master.yaml")
156160

157161

158-
def write_host_and_group_vars(configurations, providers, cluster_id, log):
162+
def get_host_and_group_vars(configurations, providers, cluster_id, log):
159163
"""
160164
Filters unnecessary information
161165
@param log:
@@ -167,17 +171,21 @@ def write_host_and_group_vars(configurations, providers, cluster_id, log):
167171
log.info("Generating instances file...")
168172
worker_count = 0
169173
vpn_count = 0
174+
write_remote = []
170175
for configuration, provider in zip(configurations, providers): # pylint: disable=too-many-nested-blocks
171176
for worker in configuration.get("workerInstances", []):
172-
worker_count = write_worker_vars(provider=provider, configuration=configuration, cluster_id=cluster_id,
173-
worker=worker, worker_count=worker_count, log=log)
174-
177+
worker_count, write_worker_vars_remote = get_worker_vars(provider=provider, configuration=configuration,
178+
cluster_id=cluster_id,
179+
worker=worker, worker_count=worker_count)
180+
write_remote = write_remote + write_worker_vars_remote
175181
vpngtw = configuration.get("vpnInstance")
176182
if vpngtw:
177-
write_vpn_var(provider=provider, configuration=configuration, cluster_id=cluster_id, vpngtw=vpngtw,
178-
vpn_count=vpn_count, log=log)
183+
write_remote.append(
184+
get_vpn_var(provider=provider, configuration=configuration, cluster_id=cluster_id, vpngtw=vpngtw,
185+
vpn_count=vpn_count))
179186
else:
180-
write_master_var(provider, configuration, cluster_id, log)
187+
write_remote.append(get_master_var(provider, configuration, cluster_id))
188+
return write_remote
181189

182190

183191
def pass_through(dict_from, dict_to, key_from, key_to=None):
@@ -361,6 +369,8 @@ def write_yaml(path, generated_yaml, log, alias=False):
361369
@param log:
362370
@param alias:
363371
@return:
372+
TODO: This method should be moved to a different file as it is no longer used here but at other places
373+
which are not related to ansible_configurator
364374
"""
365375
log.debug("Writing yaml %s", path)
366376

@@ -398,21 +408,19 @@ def configure_ansible_yaml(providers, configurations, cluster_id, log):
398408
@return:
399409
"""
400410
log.info("Writing ansible files...")
401-
alias = configurations[0].get("aliasDumper", False)
402411
user_roles = configurations[0].get("userRoles", [])
403412
default_user = providers[0].cloud_specification["auth"].get("username", configurations[0].get("sshUser", "Ubuntu"))
404413
add_wireguard_peers(configurations)
405-
for path, generated_yaml in [
406-
(aRP.WORKER_SPECIFICATION_FILE, generate_worker_specification_file_yaml(configurations, log)), (
407-
aRP.COMMONS_CONFIG_FILE,
408-
generate_common_configuration_yaml(cidrs=get_cidrs(configurations), configurations=configurations,
409-
cluster_id=cluster_id, ssh_user=configurations[0]["sshUser"],
410-
default_user=default_user, log=log)), (aRP.HOSTS_CONFIG_FILE,
411-
generate_ansible_hosts_yaml(
412-
configurations[0][
413-
"sshUser"],
414-
configurations,
415-
cluster_id, log)),
416-
(aRP.SITE_CONFIG_FILE, generate_site_file_yaml(user_roles))]:
417-
write_yaml(path, generated_yaml, log, alias)
418-
write_host_and_group_vars(configurations, providers, cluster_id, log) # writing included in method
414+
write_remote = []
415+
for write_remote_tuple in [
416+
(generate_worker_specification_file_yaml(configurations, log), aRP.WORKER_SPECIFICATION_FILE_REMOTE),
417+
(generate_common_configuration_yaml(cidrs=get_cidrs(configurations), configurations=configurations,
418+
cluster_id=cluster_id, ssh_user=configurations[0]["sshUser"],
419+
default_user=default_user, log=log), aRP.COMMONS_CONFIG_FILE_REMOTE),
420+
(generate_ansible_hosts_yaml(configurations[0]["sshUser"], configurations, cluster_id, log),
421+
aRP.HOSTS_CONFIG_FILE_REMOTE),
422+
(generate_site_file_yaml(user_roles), aRP.SITE_CONFIG_FILE_REMOTE)]:
423+
write_remote.append(write_remote_tuple)
424+
write_remote = write_remote + get_host_and_group_vars(configurations, providers, cluster_id,
425+
log)
426+
return write_remote

0 commit comments

Comments
 (0)