Skip to content

Commit fee9778

Browse files
authored
Fix ansible-operator finalizer concurrency issue (#5678)
* Fix ansible-operator finalizer concurrency issue For ansible-based operators, this change fixes an issue that caused finalizers to fail to run if the watched resource (CR) is deleted during reconciliation. Fixes #4909 Signed-off-by: Austin Macdonald <[email protected]> * dont change constants Signed-off-by: Austin Macdonald <[email protected]> * change name so we dont have a half-made test Signed-off-by: Austin Macdonald <[email protected]> * PR cleanup Signed-off-by: Austin Macdonald <[email protected]> * rm extra --- Signed-off-by: Austin Macdonald <[email protected]>
1 parent 61132e1 commit fee9778

File tree

9 files changed

+164
-9
lines changed

9 files changed

+164
-9
lines changed

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,16 @@ e2e_targets := test-e2e $(e2e_tests)
156156
export KIND_CLUSTER := osdk-test
157157

158158
KUBEBUILDER_ASSETS = $(PWD)/$(shell go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest && $(shell go env GOPATH)/bin/setup-envtest use $(ENVTEST_K8S_VERSION) --bin-dir tools/bin/ -p path)
159-
test-e2e-setup: build
159+
test-e2e-setup:: build dev-install cluster-create
160+
161+
.PHONY: cluster-create
162+
cluster-create::
163+
[[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER)
164+
165+
.PHONY: dev-install
166+
dev-install::
160167
$(SCRIPTS_DIR)/fetch kind 0.11.0
161168
$(SCRIPTS_DIR)/fetch kubectl $(ENVTEST_K8S_VERSION) # Install kubectl AFTER envtest because envtest includes its own kubectl binary
162-
[[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER)
163169

164170
.PHONY: test-e2e-teardown
165171
test-e2e-teardown:
@@ -177,7 +183,7 @@ test-e2e-go:: image/custom-scorecard-tests ## Run Go e2e tests
177183
test-e2e-ansible:: image/ansible-operator ## Run Ansible e2e tests
178184
go test -count=1 ./internal/ansible/proxy/...
179185
go test ./test/e2e/ansible -v -ginkgo.v
180-
test-e2e-ansible-molecule:: image/ansible-operator ## Run molecule-based Ansible e2e tests
186+
test-e2e-ansible-molecule:: install dev-install image/ansible-operator ## Run molecule-based Ansible e2e tests
181187
go run ./hack/generate/samples/molecule/generate.go
182188
./hack/tests/e2e-ansible-molecule.sh
183189
test-e2e-helm:: image/helm-operator ## Run Helm e2e tests
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# entries is a list of entries to include in
2+
# release notes and/or the migration guide
3+
entries:
4+
- description: >
5+
For ansible-based operators, this change fixes an issue that caused
6+
finalizers to fail to run if the watched resource (CR) is deleted during
7+
reconciliation.
8+
kind: "bugfix"
9+
10+
# Is this a breaking change?
11+
breaking: false

hack/generate/samples/internal/ansible/advanced_molecule.go

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,12 @@ func (ma *AdvancedMolecule) addMocksFromTestdata() {
235235
cmd = exec.Command("cp", "-r", "../../../hack/generate/samples/internal/ansible/testdata/inventory/", filepath.Join(ma.ctx.Dir, "inventory/"))
236236
_, err = ma.ctx.Run(cmd)
237237
pkg.CheckError("adding inventory/", err)
238+
239+
log.Infof("adding finalizer for finalizerconcurrencytest")
240+
cmd = exec.Command("cp", "../../../hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml", filepath.Join(ma.ctx.Dir, "playbooks/finalizerconcurrencyfinalizer.yml"))
241+
_, err = ma.ctx.Run(cmd)
242+
pkg.CheckError("adding finalizer for finalizerconccurencytest", err)
243+
238244
}
239245

240246
func (ma *AdvancedMolecule) updateDockerfile() {
@@ -456,8 +462,6 @@ func (ma *AdvancedMolecule) updatePlaybooks() {
456462
command: '{{ exec_command }}'
457463
register: exec_result
458464
459-
- debug: var=exec_result
460-
461465
- name: Get logs from busybox pod
462466
k8s_log:
463467
name: '{{ meta.name }}-busybox'
@@ -516,6 +520,36 @@ func (ma *AdvancedMolecule) updatePlaybooks() {
516520
clusterAnnotationTest)
517521
pkg.CheckError("adding playbook for clusterannotationtest", err)
518522

523+
log.Infof("adding playbook for finalizerconcurrencytest")
524+
const finalizerConcurrencyTest = `---
525+
- hosts: localhost
526+
gather_facts: no
527+
collections:
528+
- kubernetes.core
529+
- operator_sdk.util
530+
531+
tasks:
532+
- debug:
533+
msg: "Pausing until configmap exists"
534+
535+
- name: Wait for configmap
536+
k8s_info:
537+
apiVersion: v1
538+
kind: ConfigMap
539+
name: unpause-reconciliation
540+
namespace: osdk-test
541+
wait: yes
542+
wait_sleep: 10
543+
wait_timeout: 360
544+
545+
- debug:
546+
msg: "Unpause!"
547+
`
548+
err = kbutil.ReplaceInFile(
549+
filepath.Join(ma.ctx.Dir, "playbooks", "finalizerconcurrencytest.yml"),
550+
originalPlaybookFragment,
551+
finalizerConcurrencyTest)
552+
pkg.CheckError("adding playbook for finalizerconcurrencytest", err)
519553
}
520554

521555
func (ma *AdvancedMolecule) addPlaybooks() {
@@ -524,6 +558,7 @@ func (ma *AdvancedMolecule) addPlaybooks() {
524558
"CaseTest",
525559
"CollectionTest",
526560
"ClusterAnnotationTest",
561+
"FinalizerConcurrencyTest",
527562
"ReconciliationTest",
528563
"SelectorTest",
529564
"SubresourcesTest",

hack/generate/samples/internal/ansible/constants.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,6 @@ const targetMoleculeCheckDeployment = `- name: Wait 2 minutes for memcached depl
418418
)}}'`
419419

420420
const molecuTaskToCheckConfigMap = `
421-
422421
- name: Create ConfigMap that the Operator should delete
423422
k8s:
424423
definition:
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
---
2+
- hosts: localhost
3+
gather_facts: no
4+
collections:
5+
- kubernetes.core
6+
- operator_sdk.util
7+
8+
tasks:
9+
- debug:
10+
msg: "Pausing until configmap exists"
11+
12+
- name: Wait for configmap
13+
k8s_info:
14+
api_version: v1
15+
kind: ConfigMap
16+
name: finalizer-concurrency-results
17+
namespace: osdk-test
18+
wait: yes
19+
wait_sleep: 10
20+
wait_timeout: 30
21+
22+
- name: Update configmap
23+
k8s:
24+
state: present
25+
force: yes
26+
definition:
27+
apiVersion: v1
28+
kind: ConfigMap
29+
metadata:
30+
name: finalizer-concurrency-results
31+
namespace: osdk-test
32+
data:
33+
finalizer: "success"
34+
wait: yes
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
---
2+
# TODO(asmacdo) this should be the only task. the other is getting magiced in
3+
- name: Create the test.example.com/v1alpha1.FinalizerConcurrencyTest
4+
k8s:
5+
state: present
6+
definition:
7+
apiVersion: test.example.com/v1alpha1
8+
kind: FinalizerConcurrencyTest
9+
metadata:
10+
name: finalizer-concurrency-test
11+
namespace: '{{ namespace }}'
12+
wait: no
13+
14+
- name: While reconcile is paused, delete the CR
15+
k8s:
16+
state: absent
17+
definition:
18+
apiVersion: test.example.com/v1alpha1
19+
kind: FinalizerConcurrencyTest
20+
metadata:
21+
name: finalizer-concurrency-test
22+
namespace: '{{ namespace }}'
23+
wait: no
24+
25+
- name: Create a configmap to allow reconciliation to unpause
26+
k8s:
27+
state: present
28+
definition:
29+
apiVersion: v1
30+
kind: ConfigMap
31+
metadata:
32+
name: finalizer-concurrency-results
33+
namespace: osdk-test
34+
wait: no
35+
36+
- name: Wait for the custom resource to be deleted
37+
k8s_info:
38+
api_version: test.example.com/v1alpha1
39+
kind: FinalizerConcurrencyTest
40+
namespace: osdk-test # TODO(asmacdo) Fixme
41+
name: finalizer-concurrency-test
42+
register: cr
43+
retries: 10
44+
delay: 6
45+
until: not cr.resources
46+
failed_when: cr.resources
47+
48+
- name: Retrive the cm
49+
k8s_info:
50+
api_version: v1
51+
kind: ConfigMap
52+
name: finalizer-concurrency-results
53+
namespace: osdk-test
54+
register: finalizer_test
55+
56+
- name: Assert that finalizer ran
57+
assert:
58+
that: finalizer_test.resources.0.data.finalizer== 'success'

hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
status: "True"
1919
register: subresources_test
2020

21-
- debug: var=subresources_test
22-
2321
- name: Assert stdout and stderr are properly set in status
2422
assert:
2523
that:

hack/generate/samples/internal/ansible/testdata/watches.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,14 @@
7171
watchClusterScopedResources: true
7272
vars:
7373
meta: '{{ ansible_operator_meta }}'
74+
75+
- version: v1alpha1
76+
group: test.example.com
77+
kind: FinalizerConcurrencyTest
78+
playbook: playbooks/finalizerconcurrencytest.yml
79+
finalizer:
80+
name: test.example.com/finalizer
81+
playbook: playbooks/finalizerconcurrencyfinalizer.yml
82+
vars:
83+
meta: '{{ ansible_operator_meta }}'
7484
#+kubebuilder:scaffold:watch

internal/ansible/controller/reconcile.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,15 +243,19 @@ func (r *AnsibleOperatorReconciler) Reconcile(ctx context.Context, request recon
243243
// and do it at the end
244244
runSuccessful := len(failureMessages) == 0
245245

246+
recentlyDeleted := u.GetDeletionTimestamp() != nil
247+
246248
// The finalizer has run successfully, time to remove it
247-
deleted = u.GetDeletionTimestamp() != nil
248249
if deleted && finalizerExists && runSuccessful {
249250
controllerutil.RemoveFinalizer(u, finalizer)
250251
err := r.Client.Update(ctx, u)
251252
if err != nil {
252253
logger.Error(err, "Failed to remove finalizer")
253254
return reconcileResult, err
254255
}
256+
} else if recentlyDeleted && finalizerExists {
257+
// If the CR was deleted after the reconcile began, we need to requeue for the finalizer.
258+
reconcileResult.Requeue = true
255259
}
256260
if r.ManageStatus {
257261
errmark := r.markDone(ctx, request.NamespacedName, u, statusEvent, failureMessages)

0 commit comments

Comments
 (0)