Skip to content

Commit 0124123

Browse files
author
ksatzke
committed
added configuration of sandox_gpu container image for wf pods to run GPU related tests
1 parent 9fba583 commit 0124123

File tree

4 files changed

+15
-15
lines changed

4 files changed

+15
-15
lines changed

ManagementService/python/deployWorkflow.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ def compile_resource_info_map(resource_names, uploaded_resources, email, sapi, d
132132
resource_metadata = json.loads(resource_metadata)
133133
if "runtime" in resource_metadata:
134134
resource_info["runtime"] = resource_metadata["runtime"]
135-
if "on_gpu" in resource_metadata:
136-
resource_info["on_gpu"] = True
135+
#if "on_gpu" in resource_metadata:
136+
# resource_info["on_gpu"] = True
137137

138138
num_chunks_str = dlc.get("grain_source_zip_num_chunks_" + resource_id)
139139
try:
@@ -302,17 +302,11 @@ def create_k8s_deployment(email, workflow_info, runtime, management=False):
302302
kservice['spec']['template']['spec']['containers'][0]['volumeMounts'] = [{'name': 'new-workflow-conf', 'mountPath': '/opt/mfn/SandboxAgent/conf'}]
303303
kservice['spec']['template']['spec']['serviceAccountName'] = new_workflow_conf['mgmtserviceaccount']
304304

305-
# management container should not consume a CPU
306-
#kservice['spec']['template']['spec']['containers'][0]['image'] = new_workflow_conf['image.Python']
307-
#if ("nvidia.com/gpu" in kservice['spec']['template']['spec']['containers'][0]['resources']['limits'].keys()):
308-
# overwrite limits entry, generate new k/v pair
309-
#print("RESOURCES: " + str(kservice['spec']['template']['spec']['containers'][0]['resources'])) # just testin...
310-
#print("RESOURCES: " + str(kservice['spec']['template']['spec']['containers'][0]['resources']['limits'])) # just testin...
305+
# management container should not consume a CPU and use standard sandbox image
311306
if (labels['workflowid'] == "Management"):
312307
kservice['spec']['template']['spec']['containers'][0]['resources']['limits']['nvidia.com/gpu'] = "0"
313308
kservice['spec']['template']['spec']['containers'][0]['resources']['requests']['nvidia.com/gpu'] = "0"
314-
#kservice['spec']['template']['spec']['containers'][0]['resources']['limits'] = {{"cpu": 1, "memory": "2Gi"}, "requests": {"cpu": 1, "memory": "1Gi"}}
315-
#kservice['spec']['template']['spec']['containers'][0]['resources']['limits']['nvidia.com/gpu'] = 0
309+
kservice['spec']['template']['spec']['containers'][0]['image'] = "localhost:5000/microfn/sandbox"
316310

317311
if 'HTTP_GATEWAYPORT' in new_workflow_conf:
318312
env.append({'name': 'HTTP_GATEWAYPORT', 'value': new_workflow_conf['HTTP_GATEWAYPORT']})
@@ -485,9 +479,9 @@ def handle(value, sapi):
485479
# _XXX_: due to the queue service still being in java in the sandbox
486480

487481
sandbox_image_name = "microfn/sandbox" # default value
488-
if "on_gpu" in resource_info_map.keys(): # sandbox_gpu image should be used for ths workflow
489-
if resource_info_map["on_gpu"] == True:
490-
sandbox_image_name = "microfn/sandbox_gpu"
482+
#if "on_gpu" in resource_info_map.keys(): # sandbox_gpu image should be used for ths workflow
483+
# if resource_info_map["on_gpu"] == True:
484+
# sandbox_image_name = "microfn/sandbox_gpu"
491485

492486
if any(resource_info_map[res_name]["runtime"] == "Java" for res_name in resource_info_map):
493487
sandbox_image_name = "microfn/sandbox_java"

deploy/helm/microfunctions/values.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ manager:
4545
cpu: 100m
4646
memory: 1Gi
4747
sandbox:
48-
imagePathPython: "/microfn/sandbox"
49-
imagePathPythonGPU: "/microfn/sandbox_gpu"
48+
#imagePathPython: "/microfn/sandbox"
49+
imagePathPython: "/microfn/sandbox_gpu"
5050
imagePathJava: "/microfn/sandbox_java"
5151
imageTag: "latest"
5252
imagePullPolicy: "Always"

mfn_sdk/mfn_sdk/workflow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,8 @@ def execute(self,data,timeout=60, check_duration=False):
279279

280280
# we are already deployed and have the endpoints stored in self._endpoints
281281
url = random.choice(self._endpoints)
282+
print(url)
283+
url=url+":30336"
282284
try:
283285
#postdata = {}
284286
#postdata["value"] = json.dumps(data)

tests/mfn_test_utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@ def _get_resource_info_map(self, workflow_description=None, resource_info_map=No
219219
print("ERROR: invalid workflow description.")
220220
assert False
221221

222+
#resource_info_map[resource_name]['on_gpu'] = True
223+
224+
#print("resource_info_map: " + str(resource_info_map))
225+
222226
return resource_info_map
223227

224228
def _delete_resource_if_existing(self, existing_resources, resource_name):

0 commit comments

Comments
 (0)