Skip to content

Commit fdeafde

Browse files
Merge branch 'dev' into rest-simplevm
# Conflicts: # bibigrid/core/rest/models.py # bibigrid/core/startup_rest.py
2 parents df1bacb + 25aa555 commit fdeafde

32 files changed

+353
-249
lines changed

bibigrid.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# For an easy introduction see https://github.com/deNBI/bibigrid_clum
2-
# For more detailed information see https://github.com/BiBiServ/bibigrid/blob/master/documentation/markdown/features/configuration.md
1+
# For an easy introduction see https://github.com/deNBI/bibigrid_clum
2+
# For more detailed information see https://github.com/BiBiServ/bibigrid/blob/master/documentation/markdown/features/configuration.md
33

44
- # -- BEGIN: GENERAL CLUSTER INFORMATION --
55
# The following options configure cluster wide keys
@@ -123,4 +123,4 @@
123123
# terminate: True # whether the volume is terminated on server termination
124124
# size: 50
125125

126-
#- [next configurations]
126+
# - [next configurations]

bibigrid/core/actions/create.py

Lines changed: 38 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import subprocess
88
import threading
99
import traceback
10-
from functools import partial
1110

1211
import paramiko
1312
import sympy
@@ -19,46 +18,14 @@
1918
from bibigrid.core.utility import image_selection
2019
from bibigrid.core.utility.handler import ssh_handler
2120
from bibigrid.core.utility.paths import ansible_resources_path as a_rp
22-
from bibigrid.core.utility.paths import bin_path
21+
from bibigrid.core.utility.paths.basic_path import CLUSTER_INFO_FOLDER, KEY_FOLDER, CLUSTER_MEMORY_PATH
22+
from bibigrid.core.utility.statics.create_statics import AC_NAME, KEY_NAME, DEFAULT_SECURITY_GROUP_NAME, \
23+
WIREGUARD_SECURITY_GROUP_NAME, MASTER_IDENTIFIER, WORKER_IDENTIFIER, \
24+
VPNGTW_IDENTIFIER, UPLOAD_FILEPATHS
2325
from bibigrid.models import exceptions
2426
from bibigrid.models import return_threading
2527
from bibigrid.models.exceptions import ExecutionException, ConfigurationException
2628

27-
PREFIX = "bibigrid"
28-
SEPARATOR = "-"
29-
PREFIX_WITH_SEP = PREFIX + SEPARATOR
30-
FILEPATHS = [(a_rp.PLAYBOOK_PATH, a_rp.PLAYBOOK_PATH_REMOTE), (bin_path.BIN_PATH, bin_path.BIN_PATH_REMOTE)]
31-
32-
33-
def get_identifier(identifier, cluster_id, additional=""):
34-
"""
35-
This method does more advanced string formatting to generate master, vpngtw and worker names
36-
@param identifier: master|vpngtw|worker
37-
@param cluster_id: id of cluster
38-
@param additional: an additional string to be added at the end
39-
@return: the generated string
40-
"""
41-
general = PREFIX_WITH_SEP + identifier + SEPARATOR + cluster_id
42-
if additional or additional == 0:
43-
return general + SEPARATOR + str(additional)
44-
return general
45-
46-
47-
MASTER_IDENTIFIER = partial(get_identifier, identifier="master", additional="")
48-
WORKER_IDENTIFIER = partial(get_identifier, identifier="worker")
49-
VPN_WORKER_IDENTIFIER = partial(get_identifier, identifier="vpngtw")
50-
51-
KEY_PREFIX = "tempKey_bibi"
52-
CONFIG_FOLDER = os.path.expanduser("~/.config/bibigrid/")
53-
KEY_FOLDER = os.path.join(CONFIG_FOLDER, "keys/")
54-
AC_NAME = "ac" + SEPARATOR + "{cluster_id}"
55-
KEY_NAME = KEY_PREFIX + SEPARATOR + "{cluster_id}"
56-
CLUSTER_MEMORY_FOLDER = KEY_FOLDER
57-
CLUSTER_MEMORY_FILE = ".bibigrid.mem"
58-
CLUSTER_MEMORY_PATH = os.path.join(CONFIG_FOLDER, CLUSTER_MEMORY_FILE)
59-
DEFAULT_SECURITY_GROUP_NAME = "default" + SEPARATOR + "{cluster_id}"
60-
WIREGUARD_SECURITY_GROUP_NAME = "wireguard" + SEPARATOR + "{cluster_id}"
61-
6229

6330
class Create: # pylint: disable=too-many-instance-attributes,too-many-arguments
6431
"""
@@ -103,6 +70,22 @@ def __init__(self, *, providers, configurations, config_path, log, debug=False,
10370
"useMasterWithPublicIp", True)
10471
self.log.debug("Keyname: %s", self.key_name)
10572

73+
os.makedirs(os.path.join(CLUSTER_INFO_FOLDER), exist_ok=True)
74+
self.write_cluster_state({"floating_ip": None, "state": 202,
75+
"message": "Create process has been started."})
76+
77+
def write_cluster_state(self, state):
78+
state = {"cluster_id": self.cluster_id, "ssh_user": self.ssh_user, **state}
79+
# last cluster
80+
with open(CLUSTER_MEMORY_PATH, mode="w+", encoding="UTF-8") as cluster_memory_file:
81+
yaml.safe_dump(data=state, stream=cluster_memory_file)
82+
# all clusters
83+
cluster_info_path = os.path.normpath(os.path.join(CLUSTER_INFO_FOLDER, f"{self.cluster_id}.yaml"))
84+
if not cluster_info_path.startswith(os.path.normpath(CLUSTER_INFO_FOLDER)):
85+
raise ValueError("Invalid cluster_id resulting in path traversal")
86+
with open(cluster_info_path, mode="w+", encoding="UTF-8") as cluster_info_file:
87+
yaml.safe_dump(data=state, stream=cluster_info_file)
88+
10689
def create_defaults(self):
10790
self.log.debug("Creating default files")
10891
if not self.configurations[0].get("customAnsibleCfg", False) or not os.path.isfile(a_rp.ANSIBLE_CFG_PATH):
@@ -138,10 +121,6 @@ def generate_keypair(self):
138121
for provider in self.providers:
139122
provider.create_keypair(name=self.key_name, public_key=public_key)
140123

141-
# write cluster_id to automatically read it on following calls if no cid is given
142-
with open(CLUSTER_MEMORY_PATH, mode="w+", encoding="UTF-8") as cluster_memory_file:
143-
yaml.safe_dump(data={"cluster_id": self.cluster_id, "ssh_user": self.ssh_user}, stream=cluster_memory_file)
144-
145124
def delete_old_vars(self):
146125
"""
147126
Deletes host_vars and group_vars
@@ -238,14 +217,14 @@ def start_vpn_or_master(self, configuration, provider): # pylint: disable=too-m
238217
raise ConfigurationException(f"MAC address for ip {configuration['private_v4']} not found.")
239218

240219
# pylint: disable=comparison-with-callable
241-
if identifier == VPN_WORKER_IDENTIFIER or (identifier == MASTER_IDENTIFIER and self.use_master_with_public_ip):
220+
if identifier == VPNGTW_IDENTIFIER or (identifier == MASTER_IDENTIFIER and self.use_master_with_public_ip):
242221
configuration["floating_ip"] = \
243222
provider.attach_available_floating_ip(network=external_network, server=server)["floating_ip_address"]
244223
if identifier == MASTER_IDENTIFIER:
245-
with open(CLUSTER_MEMORY_PATH, mode="w+", encoding="UTF-8") as cluster_memory_file:
246-
yaml.safe_dump(
247-
data={"cluster_id": self.cluster_id, "floating_ip": configuration["floating_ip"]},
248-
stream=cluster_memory_file)
224+
self.write_cluster_state({"cluster_id": self.cluster_id, "floating_ip": configuration["floating_ip"],
225+
"state": 202,
226+
"message": "Create process has been started. Master has been created."
227+
})
249228
self.log.debug(f"Added floating ip {configuration['floating_ip']} to {name}.")
250229
elif identifier == MASTER_IDENTIFIER:
251230
configuration["floating_ip"] = server["private_v4"] # pylint: enable=comparison-with-callable
@@ -303,12 +282,13 @@ def create_server_volumes(self, provider, instance, name):
303282
@param name: sever name
304283
@return:
305284
"""
306-
self.log.info("Creating volumes ...")
285+
self.log.info(f"Creating volumes for {name}...")
307286
return_volumes = []
308-
309287
group_instance = {"volumes": []}
310288
instance["group_instances"] = {name: group_instance}
289+
311290
for i, volume in enumerate(instance.get("volumes", [])):
291+
self.log.debug(f"Volume {i}: {volume}")
312292
if not volume.get("exists"):
313293
if volume.get("permanent"):
314294
infix = "perm"
@@ -332,10 +312,10 @@ def create_server_volumes(self, provider, instance, name):
332312
if not return_volume:
333313
raise ConfigurationException(f"Snapshot {volume['snapshot']} not found!")
334314
else:
335-
self.log.debug("Creating volume...")
336315
return_volume = provider.create_volume(name=volume_name, size=volume.get("size", 50),
337316
volume_type=volume.get("type"),
338317
description=f"Created for {name}")
318+
self.log.info(f"Volumes {i} created for {name}...")
339319
return_volumes.append(return_volume)
340320
return return_volumes
341321

@@ -382,7 +362,7 @@ def prepare_vpn_or_master_args(self, configuration):
382362
identifier = MASTER_IDENTIFIER
383363
elif configuration.get("vpnInstance"):
384364
instance_type = configuration["vpnInstance"]
385-
identifier = VPN_WORKER_IDENTIFIER
365+
identifier = VPNGTW_IDENTIFIER
386366
else:
387367
self.log.warning(
388368
f"Configuration {configuration['cloud_identifier']} "
@@ -464,7 +444,8 @@ def upload_data(self, private_key, clean_playbook=False):
464444
ssh_handler.execute_ssh(ssh_data=ssh_data, log=self.log)
465445
self.log.info("Uploading Data")
466446
ssh_data = {"floating_ip": self.master_ip, "private_key": private_key, "username": self.ssh_user,
467-
"commands": commands, "filepaths": FILEPATHS, "gateway": self.configurations[0].get("gateway", {}),
447+
"commands": commands, "filepaths": UPLOAD_FILEPATHS,
448+
"gateway": self.configurations[0].get("gateway", {}),
468449
"timeout": self.ssh_timeout}
469450
ssh_handler.execute_ssh(ssh_data=ssh_data, log=self.log)
470451

@@ -592,6 +573,9 @@ def create(self): # pylint: disable=too-many-branches,too-many-statements
592573
else:
593574
return 0 # will be called if no exception occurred
594575
terminate.terminate(cluster_id=self.cluster_id, providers=self.providers, log=self.log, debug=self.debug)
576+
self.write_cluster_state({"floating_ip": self.configurations[0]["floating_ip"],
577+
"state": 500,
578+
"message": "Cluster creation failed. Terminated remains."})
595579
return 1
596580

597581
def log_cluster_start_info(self):
@@ -616,3 +600,6 @@ def log_cluster_start_info(self):
616600
self.log.log(42, f"Detailed cluster info: ./bibigrid.sh -i '{self.config_path}' -l -cid {self.cluster_id}")
617601
if self.configurations[0].get("ide"):
618602
self.log.log(42, f"IDE Port Forwarding: ./bibigrid.sh -i '{self.config_path}' -ide -cid {self.cluster_id}")
603+
self.write_cluster_state({"floating_ip": self.configurations[0]["floating_ip"],
604+
"state": 201,
605+
"message": "Cluster successfully created."})

bibigrid/core/actions/list_clusters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pprint
77
import re
88

9-
from bibigrid.core.actions import create
9+
from bibigrid.core.utility.statics.create_statics import MASTER_IDENTIFIER
1010

1111
SERVER_REGEX = re.compile(r"^bibigrid-((master)-([a-zA-Z0-9]+)|(worker|vpngtw)-([a-zA-Z0-9]+)-\d+)$")
1212

@@ -148,7 +148,7 @@ def get_master_access_ip(cluster_id, master_provider, log):
148148
"""
149149
# TODO: maybe move the method from list_clusters as it is now independent of list_clusters
150150
log.info("Finding master ip for cluster %s...", cluster_id)
151-
master = create.MASTER_IDENTIFIER(cluster_id=cluster_id)
151+
master = MASTER_IDENTIFIER(cluster_id=cluster_id)
152152
server = master_provider.get_server(master)
153153
if server:
154154
return server.get("public_v4") or server.get("public_v6") or server.get("private_v4")

bibigrid/core/actions/terminate.py

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,25 @@
77
import re
88
import time
99

10-
from bibigrid.core.actions import create
10+
import yaml
11+
12+
from bibigrid.core.utility.paths.basic_path import CLUSTER_INFO_FOLDER, CLUSTER_MEMORY_PATH, KEY_FOLDER
13+
from bibigrid.core.utility.statics.create_statics import DEFAULT_SECURITY_GROUP_NAME, WIREGUARD_SECURITY_GROUP_NAME, \
14+
KEY_NAME, AC_NAME
1115
from bibigrid.models.exceptions import ConflictException
1216

1317

18+
def write_cluster_state(cluster_id, state):
19+
# last cluster
20+
with open(CLUSTER_MEMORY_PATH, mode="w+", encoding="UTF-8") as cluster_memory_file:
21+
yaml.safe_dump(data=state, stream=cluster_memory_file)
22+
# all clusters
23+
cluster_info_path = os.path.normpath(os.path.join(CLUSTER_INFO_FOLDER, f"{cluster_id}.yaml"))
24+
if not cluster_info_path.startswith(CLUSTER_INFO_FOLDER):
25+
raise ValueError("Invalid cluster_id resulting in path traversal")
26+
with open(cluster_info_path, mode="w+", encoding="UTF-8") as cluster_info_file:
27+
yaml.safe_dump(data=state, stream=cluster_info_file)
28+
1429
def terminate(cluster_id, providers, log, debug=False, assume_yes=False):
1530
"""
1631
Goes through all providers and gets info of all servers which name contains cluster ID.
@@ -26,14 +41,14 @@ def terminate(cluster_id, providers, log, debug=False, assume_yes=False):
2641
if not input(f"DEBUG MODE: Any non-empty input to shutdown cluster {cluster_id}. "
2742
"Empty input to exit with cluster still alive:"):
2843
return 0
29-
security_groups = [create.DEFAULT_SECURITY_GROUP_NAME]
44+
security_groups = [DEFAULT_SECURITY_GROUP_NAME]
3045
if len(providers) > 1:
31-
security_groups.append(create.WIREGUARD_SECURITY_GROUP_NAME)
46+
security_groups.append(WIREGUARD_SECURITY_GROUP_NAME)
3247
cluster_server_state = []
3348
cluster_keypair_state = []
3449
cluster_security_group_state = []
3550
cluster_volume_state = []
36-
tmp_keyname = create.KEY_NAME.format(cluster_id=cluster_id)
51+
tmp_keyname = KEY_NAME.format(cluster_id=cluster_id)
3752
local_keypairs_deleted = delete_local_keypairs(tmp_keyname, log)
3853
if assume_yes or local_keypairs_deleted or input(
3954
f"WARNING: No local temporary keyfiles found for cluster {cluster_id}. "
@@ -117,7 +132,7 @@ def delete_local_keypairs(tmp_keyname, log):
117132
"""
118133
success = False
119134
log.info("Deleting Keypair locally...")
120-
tmp_keypath = os.path.join(create.KEY_FOLDER, tmp_keyname)
135+
tmp_keypath = os.path.join(KEY_FOLDER, tmp_keyname)
121136
pub_tmp_keypath = tmp_keypath + ".pub"
122137
if os.path.isfile(tmp_keypath):
123138
os.remove(tmp_keypath)
@@ -148,14 +163,14 @@ def delete_security_groups(provider, cluster_id, security_groups, log, timeout=5
148163
for security_group_format in security_groups:
149164
security_group_name = security_group_format.format(cluster_id=cluster_id)
150165
attempts = 0
151-
tmp_success = False
166+
tmp_success = not provider.get_security_group(security_group_name)
152167
while not tmp_success:
153168
try:
154-
not_found = not provider.get_security_group(security_group_name)
155169
tmp_success = provider.delete_security_group(security_group_name)
156170
except ConflictException:
171+
log.info(f"ConflictException on deletion attempt on {provider.cloud_specification['identifier']}.")
157172
tmp_success = False
158-
if tmp_success or not_found:
173+
if tmp_success:
159174
break
160175
if attempts < timeout:
161176
attempts += 1
@@ -166,7 +181,7 @@ def delete_security_groups(provider, cluster_id, security_groups, log, timeout=5
166181
log.error(f"Attempt to delete security group {security_group_name} on "
167182
f"{provider.cloud_specification['identifier']} failed.")
168183
break
169-
log.info(f"Delete security_group {security_group_name} -> {tmp_success or not_found} on "
184+
log.info(f"Delete security_group {security_group_name} -> {tmp_success} on "
170185
f"{provider.cloud_specification['identifier']}.")
171186
success = success and tmp_success
172187
return success
@@ -183,7 +198,7 @@ def delete_application_credentials(master_provider, cluster_id, log):
183198
# implement deletion
184199
auth = master_provider.cloud_specification["auth"]
185200
if not auth.get("application_credential_id") or not auth.get("application_credential_secret"):
186-
return master_provider.delete_application_credential_by_id_or_name(create.AC_NAME.format(cluster_id=cluster_id))
201+
return master_provider.delete_application_credential_by_id_or_name(AC_NAME.format(cluster_id=cluster_id))
187202
log.info("Because you used application credentials to authenticate, "
188203
"no created application credentials need deletion.")
189204
return True
@@ -197,7 +212,7 @@ def delete_non_permanent_volumes(provider, cluster_id, log):
197212
@param log:
198213
@return: a list of the servers' (that were to be terminated) termination states
199214
"""
200-
log.info("Deleting tmp volumes on provider %s...", provider.cloud_specification['identifier'])
215+
log.info("Deleting non permanent volumes on provider %s...", provider.cloud_specification['identifier'])
201216
volume_list = provider.list_volumes()
202217
cluster_volume_state = []
203218
volume_regex = re.compile(
@@ -228,7 +243,9 @@ def terminate_output(*, cluster_server_state, cluster_keypair_state, cluster_sec
228243
cluster_server_terminated = all(cluster_server_state)
229244
cluster_keypair_deleted = all(cluster_keypair_state)
230245
cluster_security_group_deleted = all(cluster_security_group_state)
231-
cluster_volume_deleted = all(cluster_volume_state)
246+
cluster_volume_deleted = all(all(instance_volume_states) for instance_volume_states in cluster_volume_state)
247+
message = "Cluster terminated."
248+
state = 200
232249
if cluster_existed:
233250
if cluster_server_terminated:
234251
log.info("Terminated all servers of cluster %s.", cluster_id)
@@ -248,18 +265,27 @@ def terminate_output(*, cluster_server_state, cluster_keypair_state, cluster_sec
248265
log.warning("Unable to delete all volumes of cluster %s.", cluster_id)
249266
if (cluster_server_terminated and cluster_keypair_deleted and cluster_security_group_deleted and
250267
cluster_volume_deleted):
251-
log.log(42, f"Successfully terminated cluster {cluster_id}.")
268+
message = f"Successfully terminated cluster {cluster_id}."
269+
log.log(42, message)
252270
else:
253-
log.warning("Unable to terminate cluster %s properly."
254-
"\nAll servers terminated: %s"
255-
"\nAll keys deleted: %s"
256-
"\nAll security groups deleted: %s"
257-
"\nAll security groups deleted: %s", cluster_id, cluster_server_terminated,
258-
cluster_keypair_deleted, cluster_security_group_deleted, cluster_volume_deleted)
271+
message = (f"Unable to terminate cluster {cluster_id} properly."
272+
f"\nAll servers terminated: {cluster_server_terminated}"
273+
f"\nAll keys deleted: {cluster_keypair_deleted}"
274+
f"\nAll security groups deleted: {cluster_security_group_deleted}"
275+
f"\nAll volumes deleted: {cluster_volume_deleted}")
276+
log.warning(message)
259277
if ac_state:
260278
log.info("Successfully handled application credential of cluster %s.", cluster_id)
261279
else:
262280
log.warning("Unable to delete application credential of cluster %s", cluster_id)
263281
else:
264-
log.warning("Unable to find any servers for cluster-id %s. "
265-
"Check cluster-id and configuration.\nAll keys deleted: %s", cluster_id, cluster_keypair_deleted)
282+
message = "Cluster does not exist."
283+
state = 404
284+
log.warning(f"Unable to find any servers for cluster-id {cluster_id}. "
285+
f"Check cluster-id and configuration.\nAll keys deleted: {cluster_keypair_deleted}")
286+
287+
write_cluster_state(cluster_id, {"cluster_id": cluster_id,
288+
"floating_ip": None,
289+
"ssh_user": None,
290+
"state": state,
291+
"message": message})

bibigrid/core/actions/update.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Module that contains methods to update the master playbook
33
"""
44

5-
from bibigrid.core.actions import create
65
from bibigrid.core.actions.list_clusters import dict_clusters
76
from bibigrid.core.utility.handler import cluster_ssh_handler
87

@@ -19,7 +18,7 @@ def update(creator, log):
1918
log.warning(f"There are still workers up! {workers}")
2019
return 1
2120
if master_ip and ssh_user and used_private_key:
22-
master = create.MASTER_IDENTIFIER(cluster_id=creator.cluster_id)
21+
master = creator.MASTER_IDENTIFIER(cluster_id=creator.cluster_id)
2322
server = creator.providers[0].get_server(master)
2423
creator.master_ip = master_ip
2524
creator.configurations[0]["private_v4"] = server["private_v4"]

0 commit comments

Comments
 (0)