From d485f32474341a8a068ce22d1a6a5bc4073b29b9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 27 Jul 2025 08:14:45 +0000 Subject: [PATCH 1/2] Initial plan From 537709fee9e45ef7d4ccf55b9c345567bca697ff Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 27 Jul 2025 08:34:52 +0000 Subject: [PATCH 2/2] Fix pods remaining pending after local volume release - Modified discovery logic to handle Released PVs with Delete reclaim policy - When discovery finds a Released PV with Delete policy, it immediately deletes the old PV and creates a new Available PV - This eliminates the delay between volume release and availability for new pod binding - Added comprehensive tests to validate the fix for both Delete and Retain reclaim policies - All existing tests continue to pass The fix ensures volumes become available for new pods immediately after release, solving the issue where pods would remain pending until manual intervention. Co-authored-by: andyzhangx <4178417+andyzhangx@users.noreply.github.com> --- pkg/discovery/discovery.go | 27 +++- pkg/discovery/volume_release_fix_test.go | 194 +++++++++++++++++++++++ 2 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 pkg/discovery/volume_release_fix_test.go diff --git a/pkg/discovery/discovery.go b/pkg/discovery/discovery.go index e02d10727..3be70cad0 100644 --- a/pkg/discovery/discovery.go +++ b/pkg/discovery/discovery.go @@ -269,7 +269,32 @@ func (d *Discoverer) discoverVolumesAtPath(class string, config common.MountConf discoErrors = append(discoErrors, err) d.Recorder.Eventf(pv, v1.EventTypeWarning, common.EventVolumeFailedDelete, err.Error()) } - continue + // If PV exists and is Available or Bound, skip creating a new one + if pv.Status.Phase == v1.VolumeAvailable || pv.Status.Phase == v1.VolumeBound { + continue + } + // If PV is Released or Failed, check if we can replace it with a new one + if pv.Status.Phase == v1.VolumeReleased || pv.Status.Phase == v1.VolumeFailed { + // For Released PVs with Delete reclaim policy, we can delete the old PV and create a new one + // This allows faster volume availability without waiting for the cleanup process + if pv.Spec.PersistentVolumeReclaimPolicy == v1.PersistentVolumeReclaimDelete && pv.DeletionTimestamp.IsZero() { + klog.Infof("Found Released/Failed PV %q with Delete policy, removing it to create a new Available PV", pvName) + err := d.APIUtil.DeletePV(pvName) + if err != nil { + klog.Errorf("Failed to delete Released PV %q: %v", pvName, err) + continue + } + // Remove from cache to allow creating a new PV + d.Cache.DeletePV(pvName) + // Continue to create a new PV below + } else { + // For Retain policy or PVs already being deleted, skip + continue + } + } else { + // For other states (e.g., Pending), skip + continue + } } // Check that the local filePath is not already in use in any other local volume diff --git a/pkg/discovery/volume_release_fix_test.go b/pkg/discovery/volume_release_fix_test.go new file mode 100644 index 000000000..eca052323 --- /dev/null +++ b/pkg/discovery/volume_release_fix_test.go @@ -0,0 +1,194 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package discovery + +import ( + "context" + "fmt" + "testing" + + "sigs.k8s.io/sig-storage-local-static-provisioner/pkg/cache" + "sigs.k8s.io/sig-storage-local-static-provisioner/pkg/util" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// TestDiscoverVolumes_ReleasedPVWithDelete tests that a Released PV with Delete reclaim policy +// is properly handled by creating a new Available PV for the same volume path. +// This test validates the fix for the issue where pods remain pending after local volume release. +func TestDiscoverVolumes_ReleasedPVWithDelete(t *testing.T) { + // Setup directory layout for a single volume + vols := map[string][]*util.FakeDirEntry{ + "dir1": { + {Name: "mount1", Hash: 0xaaaafef5, VolumeType: util.FakeEntryFile, Capacity: 100 * 1024}, + }, + } + + test := &testConfig{ + dirLayout: vols, + expectedVolumes: vols, // We expect the volume to be recreated + } + + // Create a Released PV with Delete reclaim policy and add it to cache + volumeCache := cache.NewVolumeCache() + releasedPV := &v1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: generatePVName("mount1", testNodeName, "sc1"), + }, + Spec: v1.PersistentVolumeSpec{ + Capacity: v1.ResourceList{ + v1.ResourceStorage: *resource.NewQuantity(102400, resource.BinarySI), + }, + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}, + PersistentVolumeReclaimPolicy: v1.PersistentVolumeReclaimDelete, + StorageClassName: "sc1", + PersistentVolumeSource: v1.PersistentVolumeSource{ + Local: &v1.LocalVolumeSource{ + Path: fmt.Sprintf("%s/dir1/mount1", testHostDir), + }, + }, + }, + Status: v1.PersistentVolumeStatus{ + Phase: v1.VolumeReleased, // This is the key - PV is Released + }, + } + volumeCache.AddPV(releasedPV) + test.cache = volumeCache + + // Setup discoverer with the pre-existing Released PV + d := testSetup(t, test, false, false) + + // Add the Released PV to the fake client so it can be deleted + _, err := test.client.CoreV1().PersistentVolumes().Create(context.TODO(), releasedPV, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Failed to create Released PV in fake client: %v", err) + } + + // Before discovery: verify the Released PV exists in cache + _, exists := test.cache.GetPV(releasedPV.Name) + if !exists { + t.Fatalf("Released PV should exist in cache before discovery") + } + + // Run discovery + d.DiscoverLocalVolumes() + + // After discovery: verify a PV exists (the old one should have been deleted and a new one created) + newPV, exists := test.cache.GetPV(releasedPV.Name) + if !exists { + t.Fatalf("PV should exist in cache after discovery") + } + + // The new PV should not be in Released state + if newPV.Status.Phase == v1.VolumeReleased { + t.Errorf("Expected PV to not be in Released state after discovery, but it is still Released") + } + + // Verify the PV has the correct reclaim policy and other properties + if newPV.Spec.PersistentVolumeReclaimPolicy != v1.PersistentVolumeReclaimDelete { + t.Errorf("Expected PV reclaim policy to be Delete, got %v", newPV.Spec.PersistentVolumeReclaimPolicy) + } + + if newPV.Spec.StorageClassName != "sc1" { + t.Errorf("Expected PV storage class to be sc1, got %s", newPV.Spec.StorageClassName) + } + + expectedPath := fmt.Sprintf("%s/dir1/mount1", testHostDir) + if newPV.Spec.Local == nil || newPV.Spec.Local.Path != expectedPath { + t.Errorf("Expected PV local path to be %s, got %v", expectedPath, newPV.Spec.Local) + } +} + +// TestDiscoverVolumes_ReleasedPVWithRetain tests that a Released PV with Retain reclaim policy +// is NOT replaced by discovery (existing behavior should be preserved). +func TestDiscoverVolumes_ReleasedPVWithRetain(t *testing.T) { + // Setup directory layout for a single volume + vols := map[string][]*util.FakeDirEntry{ + "dir1": { + {Name: "mount1", Hash: 0xaaaafef5, VolumeType: util.FakeEntryFile, Capacity: 100 * 1024}, + }, + } + + test := &testConfig{ + dirLayout: vols, + expectedVolumes: map[string][]*util.FakeDirEntry{}, // No new volumes should be created + } + + // Create a Released PV with Retain reclaim policy and add it to cache + volumeCache := cache.NewVolumeCache() + releasedPV := &v1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: generatePVName("mount1", testNodeName, "sc1"), + }, + Spec: v1.PersistentVolumeSpec{ + Capacity: v1.ResourceList{ + v1.ResourceStorage: *resource.NewQuantity(102400, resource.BinarySI), + }, + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}, + PersistentVolumeReclaimPolicy: v1.PersistentVolumeReclaimRetain, // Retain policy + StorageClassName: "sc1", + PersistentVolumeSource: v1.PersistentVolumeSource{ + Local: &v1.LocalVolumeSource{ + Path: fmt.Sprintf("%s/dir1/mount1", testHostDir), + }, + }, + }, + Status: v1.PersistentVolumeStatus{ + Phase: v1.VolumeReleased, + }, + } + volumeCache.AddPV(releasedPV) + test.cache = volumeCache + + // Setup discoverer with the pre-existing Released PV + d := testSetup(t, test, false, false) + + // Before discovery: verify the Released PV exists in cache + originalPV, exists := test.cache.GetPV(releasedPV.Name) + if !exists { + t.Fatalf("Released PV should exist in cache before discovery") + } + + // Run discovery + d.DiscoverLocalVolumes() + + // After discovery: verify the Released PV with Retain policy is unchanged + currentPV, exists := test.cache.GetPV(releasedPV.Name) + if !exists { + t.Fatalf("PV should still exist in cache after discovery") + } + + // Verify the PV is still Released (unchanged behavior for Retain policy) + if currentPV.Status.Phase != v1.VolumeReleased { + t.Errorf("Expected PV to remain in Released state for Retain policy, got %v", currentPV.Status.Phase) + } + + // Verify it's still the same PV object + if currentPV.Spec.PersistentVolumeReclaimPolicy != v1.PersistentVolumeReclaimRetain { + t.Errorf("Expected PV reclaim policy to remain Retain, got %v", currentPV.Spec.PersistentVolumeReclaimPolicy) + } + + // Verify the PV wasn't modified + if originalPV.Name != currentPV.Name { + t.Errorf("PV name should not change, original: %s, current: %s", originalPV.Name, currentPV.Name) + } + + // Verify no new PVs were created + verifyCreatedPVs(t, test) +} \ No newline at end of file