-
Notifications
You must be signed in to change notification settings - Fork 143
fix uncertain cache #512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
fix uncertain cache #512
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
package modifycontroller | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"testing" | ||
|
@@ -14,7 +13,6 @@ import ( | |
"google.golang.org/grpc/codes" | ||
"google.golang.org/grpc/status" | ||
v1 "k8s.io/api/core/v1" | ||
storagev1beta1 "k8s.io/api/storage/v1beta1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
utilfeature "k8s.io/apiserver/pkg/util/feature" | ||
|
@@ -321,6 +319,65 @@ func TestInfeasibleRetry(t *testing.T) { | |
} | ||
} | ||
|
||
// Intended to catch any race conditions in the controller | ||
func TestConcurrentSync(t *testing.T) { | ||
cases := []struct { | ||
name string | ||
waitCount int | ||
err error | ||
}{ | ||
// TODO: This case is flaky due to fake client lacks resourceVersion support. | ||
// { | ||
// name: "success", | ||
// waitCount: 10, | ||
// }, | ||
{ | ||
name: "uncertain", | ||
waitCount: 30, | ||
err: nonFinalErr, | ||
}, | ||
} | ||
for _, tc := range cases { | ||
t.Run(tc.name, func(t *testing.T) { | ||
client := csi.NewMockClient(testDriverName, true, true, true, true, true, false) | ||
client.SetModifyError(tc.err) | ||
|
||
initialObjects := []runtime.Object{testVacObject, targetVacObject} | ||
for i := range 10 { | ||
initialObjects = append(initialObjects, | ||
&v1.PersistentVolumeClaim{ | ||
ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("foo-%d", i), Namespace: pvcNamespace}, | ||
Spec: v1.PersistentVolumeClaimSpec{ | ||
VolumeAttributesClassName: &testVac, | ||
VolumeName: fmt.Sprintf("testPV-%d", i), | ||
}, | ||
Status: v1.PersistentVolumeClaimStatus{ | ||
Phase: v1.ClaimBound, | ||
}, | ||
}, | ||
&v1.PersistentVolume{ | ||
ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("testPV-%d", i)}, | ||
Spec: v1.PersistentVolumeSpec{ | ||
PersistentVolumeSource: v1.PersistentVolumeSource{ | ||
CSI: &v1.CSIPersistentVolumeSource{ | ||
Driver: testDriverName, | ||
VolumeHandle: fmt.Sprintf("foo-%d", i), | ||
}, | ||
}, | ||
}, | ||
}, | ||
) | ||
} | ||
ctrlInstance := setupFakeK8sEnvironment(t, client, initialObjects) | ||
go ctrlInstance.Run(3, t.Context()) | ||
|
||
for client.GetModifyCount() < tc.waitCount { | ||
time.Sleep(20 * time.Millisecond) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
// setupFakeK8sEnvironment creates fake K8s environment and starts Informers and ModifyController | ||
func setupFakeK8sEnvironment(t *testing.T, client *csi.MockClient, initialObjects []runtime.Object) *modifyController { | ||
t.Helper() | ||
|
@@ -329,11 +386,9 @@ func setupFakeK8sEnvironment(t *testing.T, client *csi.MockClient, initialObject | |
|
||
/* Create fake kubeClient, Informers, and ModifyController */ | ||
kubeClient, informerFactory := fakeK8s(initialObjects) | ||
pvInformer := informerFactory.Core().V1().PersistentVolumes() | ||
pvcInformer := informerFactory.Core().V1().PersistentVolumeClaims() | ||
vacInformer := informerFactory.Storage().V1beta1().VolumeAttributesClasses() | ||
|
||
driverName, _ := client.GetDriverName(context.TODO()) | ||
ctx := t.Context() | ||
driverName, _ := client.GetDriverName(ctx) | ||
|
||
csiModifier, err := modifier.NewModifierFromClient(client, 15*time.Second, kubeClient, informerFactory, false, driverName) | ||
if err != nil { | ||
|
@@ -346,26 +401,10 @@ func setupFakeK8sEnvironment(t *testing.T, client *csi.MockClient, initialObject | |
workqueue.DefaultTypedControllerRateLimiter[string]()) | ||
|
||
/* Start informers and ModifyController*/ | ||
stopCh := make(chan struct{}) | ||
informerFactory.Start(stopCh) | ||
|
||
go controller.Run(1, t.Context()) | ||
|
||
/* Add initial objects to informer caches */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to add them manually, as informer will read them from |
||
for _, obj := range initialObjects { | ||
switch obj.(type) { | ||
case *v1.PersistentVolume: | ||
pvInformer.Informer().GetStore().Add(obj) | ||
case *v1.PersistentVolumeClaim: | ||
pvcInformer.Informer().GetStore().Add(obj) | ||
case *storagev1beta1.VolumeAttributesClass: | ||
vacInformer.Informer().GetStore().Add(obj) | ||
default: | ||
t.Fatalf("Test %s: Unknown initalObject type: %+v", t.Name(), obj) | ||
} | ||
} | ||
informerFactory.Start(ctx.Done()) | ||
|
||
ctrlInstance, _ := controller.(*modifyController) | ||
ctrlInstance := controller.(*modifyController) | ||
ctrlInstance.init(ctx) | ||
|
||
return ctrlInstance | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,6 @@ import ( | |
"github.com/kubernetes-csi/external-resizer/pkg/util" | ||
v1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/tools/cache" | ||
"k8s.io/utils/ptr" | ||
) | ||
|
||
|
@@ -61,16 +60,6 @@ func (ctrl *modifyController) markControllerModifyVolumeStatus( | |
if err != nil { | ||
return pvc, fmt.Errorf("mark PVC %q as modify volume failed, errored with: %v", pvc.Name, err) | ||
} | ||
// Remove this PVC from the uncertain cache since the status is known now | ||
if modifyVolumeStatus == v1.PersistentVolumeClaimModifyVolumeInfeasible { | ||
pvcKey, err := cache.MetaNamespaceKeyFunc(pvc) | ||
if err != nil { | ||
return pvc, err | ||
} | ||
|
||
ctrl.removePVCFromModifyVolumeUncertainCache(pvcKey) | ||
ctrl.markForSlowRetry(pvc, pvcKey) | ||
Comment on lines
-71
to
-72
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Already done by caller, no need to do it again. |
||
} | ||
return updatedPVC, nil | ||
} | ||
|
||
|
@@ -144,15 +133,3 @@ func clearModifyVolumeConditions(conditions []v1.PersistentVolumeClaimCondition) | |
} | ||
return knownConditions | ||
} | ||
|
||
// removePVCFromModifyVolumeUncertainCache removes the pvc from the uncertain cache | ||
func (ctrl *modifyController) removePVCFromModifyVolumeUncertainCache(pvcKey string) { | ||
if ctrl.uncertainPVCs == nil { | ||
return | ||
} | ||
// Format of the key of the uncertainPVCs is NAMESPACE/NAME of the pvc | ||
_, ok := ctrl.uncertainPVCs[pvcKey] | ||
if ok { | ||
delete(ctrl.uncertainPVCs, pvcKey) | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After a PVC is marked as completed, it is re-queued. In the next cycle, we may still read out the PVC with InProgress status. Then we should have conflict when trying to mark it InProgress again. However, fake client does not supports resourceVersion. So when we pass an empty patch to it, it returns the latest PVC, with
ModifyVolumeStatus == nil
, which will makemarkControllerModifyVolumeCompleted
panic.Should we somehow make
metadata.generation
work for PVC? So that we can cache the already synced generation+VACName, and will know the PVC in the cache is outdated, and just skip that cycle.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am trying to understand how it works, the metadata.generation field in a Kubernetes object is a sequence number that is only incremented when there is a change to the object's spec.
But ModifyVolumeStatus is in Status.ModifyVolumeStatus, therefore, caching the generation number would be ineffective? It wouldn't tell you if the status has been updated, so you would still be working with stale data?
Also Hemant raised an issue: #514 that we will fix before GA.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My plan is: we maintain a global sync.Map which maps from object UID to the generation that is has been fully reconciled. We add to this map when ControllerModifyVolume finishes with OK and specVacName matches targetVACName. We clear it when the object is deleted. When the next cycle starts, we just skip if the generation matches. This should ensure we will not sync the same VAC at the same generation twice.
Anyway, I think this is minor and should not block this PR.
Yes, I think this PR should fix that one.