Skip to content

Commit 24be237

Browse files
committed
update
1 parent 93621b8 commit 24be237

File tree

3 files changed

+44
-63
lines changed

3 files changed

+44
-63
lines changed

accounts/templates/accounts/dashboard.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
{% block content %}
44
{% if user.is_authenticated %}
55
<section class="main-content columns is-fullwidth is-fullheight">
6-
<aside class="column is-3 is-narrow-mobile is-fullheight section is-hidden-mobile">
6+
<aside class="column is-2 is-narrow-mobile is-fullheight section is-hidden-mobile">
77
<p class="menu-label is-hidden-touch">Navigation</p>
88
<ul class="menu-list">
99
<li>
@@ -44,7 +44,7 @@
4444
</ul>
4545
</aside>
4646

47-
<div class="section column is-9">
47+
<div class="section column is-10">
4848
{% block dashboard %}
4949
{% endblock %}
5050
</div>

accounts/templates/accounts/run_simulation.html

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,15 @@ <h2 class="text-center mb-5" style="text-decoration: underline;">
4747
<label class="font-weight-bold">Checkpointing</label>
4848
<br>
4949
<label class="switch">
50-
<input type="checkbox" name="checkpoint_flag">
50+
<input type="checkbox" name="checkpoint_flag" id="checkpointFlagCheckbox">
5151
<span class="slider round"></span>
5252
</label>
5353
<small class="form-text text-muted">Allows checkpointing</small>
5454
<br>
5555
<label class="font-weight-bold">Auto restart for Checkpointing</label>
5656
<br>
5757
<label class="switch">
58-
<input type="checkbox" name="auto_restart">
58+
<input type="checkbox" name="auto_restart" id="autoRestartCheckbox">
5959
<span class="slider round"></span>
6060
</label>
6161
<small class="form-text text-muted">Allows auto restarting from the checkpoint (If you activate this flat, also the previous flag will be considered active)</small>
@@ -76,7 +76,15 @@ <h2 class="text-center mb-5" style="text-decoration: underline;">
7676
<label class="form-label" for="customFile">Input file</label>
7777
{% render_field form.document type="file" class="form-control" id="customFile" placeholder="Upload your workflow definition" %}
7878
<small class="form-text text-muted">Upload your workflow's definition</small>
79+
<script>
80+
const autoRestartCheckbox = document.getElementById('autoRestartCheckbox');
81+
const checkpointFlagCheckbox = document.getElementById('checkpointFlagCheckbox');
7982

83+
autoRestartCheckbox.addEventListener('change', function () {
84+
checkpointFlagCheckbox.checked = this.checked;
85+
checkpointFlagCheckbox.disabled = this.checked;
86+
});
87+
</script>
8088
</div>
8189
<br>
8290
<button type="submit" class="btn btn-primary fa fa-play-circle" onclick="this.disabled=true,this.form.submit();"></button>

accounts/views.py

Lines changed: 32 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -582,8 +582,6 @@ def run(self):
582582
cmd2 = "source /etc/profile; source " + path_install_dir + "/scripts/load.sh " + path_install_dir + " " + param_machine + "; " + get_variables_exported(
583583
exported_variables) + " mkdir -p " + execution_folder + "; cd " + path_install_dir + "/scripts/" + machine_folder + "/; source app.sh " + userMachine + " " + str(
584584
self.name) + " " + workflow_folder + " " + execution_folder + " " + self.numNodes + " " + self.execTime + " " + self.qos + " " + machine_found.installDir + " " + self.branch + " " + machine_found.dataDir + " " + self.gOPTION + " " + self.tOPTION + " " + self.dOPTION
585-
log.info("COMMAND")
586-
log.info(cmd2)
587585
stdin, stdout, stderr = ssh.exec_command(cmd2)
588586
stdout = stdout.readlines()
589587
stderr = stderr.readlines()
@@ -599,8 +597,6 @@ def run(self):
599597
self.request.session['jobID'] = jobID
600598
self.request.session['execution_folder'] = execution_folder
601599
os.remove("documents/" + str(self.name))
602-
if self.auto_restart_bool:
603-
monitor_checkpoint(var, self.request, self.execTime, machine_found.id)
604600
return
605601

606602

@@ -634,7 +630,7 @@ def run_sim(request):
634630
if form.is_valid():
635631
branch = request.POST.get('branchChoice')
636632
bash_script_path = "/var/www/API_REST/documents/delete_old_files.sh"
637-
# execute_bash_script(bash_script_path)
633+
execute_bash_script(bash_script_path)
638634
for filename, file in request.FILES.items():
639635
uniqueID = uuid.uuid4()
640636
nameE = (str(file).split(".")[0]) + "_" + str(uniqueID) + "." + str(file).split(".")[1]
@@ -993,13 +989,16 @@ def executions(request):
993989

994990
except:
995991
return False
992+
996993
request.session["content"] = content
997994
request.session['machine_chosen'] = machine_found.id
998995
c = Connection()
999996
c.user = request.user
1000997
c.status = "Active"
1001998
c.save()
1002999
request.session["idConn"] = c.idConn_id
1000+
threadUpdate = updateExecutions(request, c.idConn_id)
1001+
threadUpdate.start()
10031002
checkConnBool = checkConnection(request)
10041003
if not checkConnBool:
10051004
machines_done = populate_executions_machines(request)
@@ -1008,8 +1007,6 @@ def executions(request):
10081007
request.session["checkConn"] = "Required"
10091008
return render(request, 'accounts/executions.html',
10101009
{'machines': machines_done, 'checkConn': "no"})
1011-
threadUpdate = updateExecutions(request)
1012-
threadUpdate.start()
10131010
machine_connected = Machine.objects.get(id=request.session["machine_chosen"])
10141011
executions = Execution.objects.all().filter(author=request.user, machine=machine_connected).filter(
10151012
Q(status="PENDING") | Q(status="RUNNING") | Q(status="INITIALIZING"))
@@ -1105,23 +1102,28 @@ def populate_executions_machines(request):
11051102

11061103

11071104
class updateExecutions(threading.Thread):
1108-
def __init__(self, request):
1105+
def __init__(self, request, connectionID):
11091106
threading.Thread.__init__(self)
11101107
self.request = request
11111108
self.timeout = 120 * 60
1109+
self.connectionID=connectionID
11121110

11131111
def run(self):
11141112
timeout_start = time.time()
11151113
while time.time() < timeout_start + self.timeout:
1114+
conn=Connection.objects.get(idConn_id=self.connectionID)
1115+
if conn.status=="Disconnect":
1116+
break
11161117
boolException = update_table(self.request)
11171118
if not boolException:
11181119
break
11191120
time.sleep(10)
1120-
Connection.objects.filter(idConn_id=self.request.session["idConn"]).update(status="Disconnect")
1121+
Connection.objects.filter(idConn_id=self.connectionID).update(status="Disconnect")
11211122
render_right(self.request)
11221123
return
11231124

11241125

1126+
11251127
def update_table(request):
11261128
machine_found = Machine.objects.get(id=request.session['machine_chosen'])
11271129
machineID = machine_found.id
@@ -1143,6 +1145,10 @@ def update_table(request):
11431145
if not (str(values[4]) == "FAILED" and executionE.status == "INITIALIZING"):
11441146
Execution.objects.filter(jobID=executionE.jobID).update(status=values[4], time=values[3],
11451147
nodes=int(values[2]))
1148+
executionTimeout = Execution.objects.all().filter(author=request.user, autorestart=True, status="TIMEOUT")
1149+
for executionT in executionTimeout:
1150+
executionT.status="CONTINUE"
1151+
checkpointing(executionT.jobID, request, executionT.machine_id)
11461152
return True
11471153

11481154

@@ -1185,110 +1191,77 @@ def stopExecution(eIDstop, request):
11851191
{'form': form, 'executions': executions, 'executionsDone': executionsDone,
11861192
'executionsFailed': executionsFailed, 'executionsTimeout': executionTimeout})
11871193

1188-
1189-
class auto_restart_thread(threading.Thread):
1190-
def __init__(self, jobID, request, time, machine_id):
1191-
threading.Thread.__init__(self)
1192-
self.jobID = jobID
1193-
self.request = request
1194-
self.time = time
1195-
self.machine_id = machine_id
1196-
1197-
def run(self):
1198-
time.sleep(int(self.time) * 60)
1199-
wait_timeout_new(self.jobID, self.request, self.machine_id)
1200-
return
1201-
1202-
1203-
def wait_timeout_new(jobID, request, machine_id):
1204-
execution = Execution.objects.get(jobID=jobID)
1205-
if execution.status != "TIMEOUT":
1206-
time.sleep(15)
1207-
wait_timeout_new(jobID, request)
1208-
else:
1209-
checkpointing(jobID, request, machine_id)
1210-
return
1211-
1212-
1213-
def monitor_checkpoint(jobID, request, execTime, machine):
1214-
auto_restart_obj = auto_restart_thread(jobID, request, execTime, machine)
1215-
auto_restart_obj.start()
1216-
return
1217-
1218-
12191194
def checkpointing(jobIDCheckpoint, request, machine_id):
12201195
ssh = connection(request.session['content'], machine_id)
12211196
checkpointID = Execution.objects.all().get(author=request.user, jobID=jobIDCheckpoint)
1222-
machine_connected = Machine.objects.get(id=machine_id)
1223-
machine_folder = extract_substring(machine_connected.fqdn)
1224-
command = "source /etc/profile; cd " + machine_connected.installDir + "/scripts/" + machine_folder + "/; sh app-checkpoint.sh " + checkpointID.user + " " + checkpointID.name_workflow + " " + checkpointID.workflow_path + " " + checkpointID.wdir + " " + str(
1225-
checkpointID.nodes) + " " + str(
1226-
checkpointID.execution_time) + " " + checkpointID.qos + " " + machine_connected.installDir
1197+
command = "source /etc/profile; cd " + checkpointID.wdir + "; source checkpoint_script.sh;"
12271198
stdin, stdout, stderr = ssh.exec_command(command)
12281199
stdout = stdout.readlines()
12291200
s = "Submitted batch job"
1201+
execTime=checkpointID.execution_time
12301202
while (len(stdout) == 0):
12311203
import time
12321204
time.sleep(1)
12331205
if (len(stdout) > 1):
12341206
for line in stdout:
12351207
if (s in line):
12361208
jobID = int(line.replace(s, ""))
1237-
request.session['jobID'] = jobID
12381209
form = Execution()
1239-
form.jobID = request.session['jobID']
1210+
form.jobID = jobID
1211+
form.eID = uuid.uuid4()
1212+
form.machine_id = checkpointID.machine_id
12401213
form.user = checkpointID.user
12411214
form.author = request.user
12421215
form.nodes = checkpointID.nodes
12431216
form.status = "PENDING"
1244-
form.checkpoint = jobIDCheckpoint
1217+
form.checkpoint = checkpointID.jobID
12451218
form.time = "00:00:00"
12461219
form.wdir = checkpointID.wdir
12471220
form.workflow_path = checkpointID.workflow_path
12481221
form.execution_time = int(checkpointID.execution_time)
1249-
time = int(checkpointID.execution_time)
1222+
execTime = int(checkpointID.execution_time)
12501223
form.name_workflow = checkpointID.name_workflow
12511224
form.qos = checkpointID.qos
12521225
form.name_sim = checkpointID.name_sim
12531226
form.autorestart = checkpointID.autorestart
1227+
form.checkpointBool = checkpointID.checkpointBool
1228+
form.d_bool = checkpointID.d_bool
1229+
form.t_bool = checkpointID.t_bool
1230+
form.g_bool = checkpointID.g_bool
1231+
form.branch = checkpointID.branch
12541232
form.save()
12551233
checkpointID = Execution.objects.all().get(author=request.user, jobID=jobIDCheckpoint)
12561234
checkpointID.status = "CONTINUE"
12571235
checkpointID.save()
1258-
monitor_checkpoint(request.session['jobID'], request, time, machine_id)
1236+
#monitor_checkpoint(request.session['jobID'], request, execTime, machine_id)
12591237
return
12601238

12611239

12621240
def checkpointing_noAutorestart(jobIDCheckpoint, request):
12631241
ssh = connection(request.session['content'], request.session['machine_chosen'])
12641242
checkpointID = Execution.objects.all().get(author=request.user, jobID=jobIDCheckpoint)
12651243
machine_connected = Machine.objects.get(id=request.session['machine_chosen'])
1266-
machine_folder = extract_substring(machine_connected.fqdn)
1267-
path_install_dir = os.path.join(machine_connected.installDir, checkpointID.branch)
1268-
param_machine = remove_numbers(machine_connected.fqdn)
12691244
command = "source /etc/profile; cd " + checkpointID.wdir + "; source checkpoint_script.sh;"
1270-
log.info("CHECKPOINT START")
1271-
log.info(command)
12721245
stdin, stdout, stderr = ssh.exec_command(command)
12731246
stdout = stdout.readlines()
12741247
s = "Submitted batch job"
1275-
log.info("READ CHECKPOINT")
12761248
while (len(stdout) == 0):
12771249
import time
12781250
time.sleep(1)
12791251
if (len(stdout) > 1):
1280-
log.info(stdout)
12811252
for line in stdout:
12821253
if (s in line):
12831254
jobID = int(line.replace(s, ""))
12841255
request.session['jobID'] = jobID
12851256
form = Execution()
1286-
form.jobID = request.session['jobID']
1257+
form.jobID = jobID
1258+
form.eID= uuid.uuid4()
1259+
form.machine_id= checkpointID.machine_id
12871260
form.user = checkpointID.user
12881261
form.author = request.user
12891262
form.nodes = checkpointID.nodes
12901263
form.status = "PENDING"
1291-
form.checkpoint = jobIDCheckpoint
1264+
form.checkpoint = checkpointID.jobID
12921265
form.time = "00:00:00"
12931266
form.wdir = checkpointID.wdir
12941267
form.workflow_path = checkpointID.workflow_path

0 commit comments

Comments
 (0)