Skip to content

Commit 194ba96

Browse files
authored
Merge pull request #337 from azimuth-cloud/feature/retry-create-by-default
Default to retrying platform create once
2 parents 57cb2c3 + 009d956 commit 194ba96

File tree

4 files changed

+23
-10
lines changed

4 files changed

+23
-10
lines changed

azimuth_caas_operator/models/v1alpha1/cluster_type.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ class ClusterTypeSpec(schema.BaseModel):
2929
# The timeout (in seconds) to apply to the kubernetes job resource
3030
# which creates, updates and deletes the cluster instances
3131
jobTimeout: int = pydantic.Field(default=1200)
32+
# The number of retries for the kubernetes job resource
33+
jobCreateRetries: int = 1
3234
# Option to add cloud specific details, like the image
3335
extraVars: schema.Dict[str, schema.Any] = pydantic.Field(default_factory=dict)
3436
# Option to define cluster-type specific details, like inventory

azimuth_caas_operator/tests/models/test_crds.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ def test_cluster_type_crd_json(self):
6161
"jobTimeout": {
6262
"type": "integer"
6363
},
64+
"jobCreateRetries": {
65+
"type": "integer"
66+
},
6467
"extraVars": {
6568
"additionalProperties": {
6669
"x-kubernetes-preserve-unknown-fields": true
@@ -301,6 +304,9 @@ def test_cluster_crd_json(self):
301304
"jobTimeout": {
302305
"type": "integer"
303306
},
307+
"jobCreateRetries": {
308+
"type": "integer"
309+
},
304310
"extraVars": {
305311
"additionalProperties": {
306312
"x-kubernetes-preserve-unknown-fields": true

azimuth_caas_operator/tests/utils/test_ansible_runner.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -174,27 +174,31 @@ def test_get_job_remove(self):
174174
},
175175
clear=True,
176176
)
177-
def test_get_job_remove_with_trust_bundle(self):
177+
def test_get_job_create_with_trust_bundle(self):
178178
cluster = cluster_crd.get_fake()
179179
cluster.spec.leaseName = None
180180
cluster_type = cluster_type_crd.get_fake()
181+
cluster_type.spec.jobCreateRetries = 3
181182

182183
job = ansible_runner.get_job(
183-
cluster, cluster_type.spec, "test1-tfstate", "trust-bundle", remove=True
184+
cluster,
185+
cluster_type.spec,
186+
"test1-tfstate",
187+
"trust-bundle",
184188
)
185189

186190
expected = """\
187191
apiVersion: batch/v1
188192
kind: Job
189193
metadata:
190-
generateName: test1-remove-
194+
generateName: test1-create-
191195
labels:
192-
azimuth-caas-action: remove
196+
azimuth-caas-action: create
193197
azimuth-caas-cluster: test1
194198
namespace: ns1
195199
spec:
196200
activeDeadlineSeconds: 1200
197-
backoffLimit: 1
201+
backoffLimit: 3
198202
template:
199203
spec:
200204
containers:
@@ -206,8 +210,7 @@ def test_get_job_remove_with_trust_bundle(self):
206210
\\nif [ -f /runner/project/requirements.yml ]; then\\n ansible-galaxy install\\
207211
\\ -r /runner/project/requirements.yml\\nelif [ -f /runner/project/roles/requirements.yml\\
208212
\\ ]; then\\n ansible-galaxy install -r /runner/project/roles/requirements.yml\\n\\
209-
fi\\nansible-runner run /runner -j\\nopenstack application credential delete\\
210-
\\ az-caas-test1 || true\\n"
213+
fi\\nansible-runner run /runner -j\\n"
211214
env:
212215
- name: RUNNER_PLAYBOOK
213216
value: sample.yaml
@@ -322,7 +325,7 @@ def test_get_job_remove_with_trust_bundle(self):
322325
- emptyDir: {}
323326
name: ansible-home
324327
- configMap:
325-
name: test1-remove
328+
name: test1-create
326329
name: env
327330
- name: cloudcreds
328331
secret:
@@ -339,7 +342,6 @@ def test_get_job_remove_with_trust_bundle(self):
339342
- configMap:
340343
name: trust-bundle
341344
name: trust-bundle
342-
ttlSecondsAfterFinished: 36000
343345
""" # noqa
344346
self.assertEqual(expected, yaml.safe_dump(job))
345347

azimuth_caas_operator/utils/ansible_runner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ def get_job(
259259

260260
remove_app_cred = remove and (cluster.spec.leaseName is None)
261261

262+
# default to 1 retry for create, if not specified in the type
263+
back_off_limit = 1 if remove else cluster_type_spec.jobCreateRetries
264+
262265
# TODO(johngarbutt): need get secret keyname from somewhere
263266
job_yaml = f"""apiVersion: batch/v1
264267
kind: Job
@@ -477,7 +480,7 @@ def get_job(
477480
if trust_bundle_configmap_name
478481
else ""
479482
}
480-
backoffLimit: {1 if remove else 0}
483+
backoffLimit: {back_off_limit}
481484
# Set timeout so that jobs don't get stuck in configuring state if something goes wrong
482485
activeDeadlineSeconds: {cluster_type_spec.jobTimeout}""" # noqa
483486
return yaml.safe_load(job_yaml)

0 commit comments

Comments
 (0)